Mise à jour terminée. Pour connaître les apports de la version 13.8.4 par rapport à notre ancienne version vous pouvez lire les "Release Notes" suivantes :
https://about.gitlab.com/releases/2021/02/11/security-release-gitlab-13-8-4-released/
https://about.gitlab.com/releases/2021/02/05/gitlab-13-8-3-released/

FQuickSortMpi.hpp 24.7 KB
Newer Older
1
// See LICENCE file at project root
2 3 4 5 6
#ifndef FQUICKSORTMPI_HPP
#define FQUICKSORTMPI_HPP

#include "FQuickSort.hpp"
#include "FMpi.hpp"
7
#include "FLog.hpp"
8
#include "FAssert.hpp"
BRAMAS Berenger's avatar
BRAMAS Berenger committed
9
#include "FEnv.hpp"
10

11
#include <memory>
12
#include <utility>
13

14 15
template <class SortType, class CompareType, class IndexType = size_t>
class FQuickSortMpi : public FQuickSort< SortType, IndexType> {
BRAMAS Berenger's avatar
BRAMAS Berenger committed
16 17 18 19
#ifdef SCALFMM_USE_LOG
    static const bool VerboseLog;
#endif

20 21 22 23 24 25 26 27 28 29 30 31 32 33
    // We need a structure see the algorithm detail to know more
    struct Partition{
        IndexType lowerPart;
        IndexType greaterPart;
    };

    struct PackData {
        int idProc;
        IndexType fromElement;
        IndexType toElement;
    };


    static void Swap(SortType& value, SortType& other){
34 35 36
        const SortType temp = std::move(value);
        value = std::move(other);
        other = std::move(temp);
37
    }
38

39 40 41 42 43 44 45
    /* A local iteration of qs */
    static IndexType QsPartition(SortType array[], IndexType left, IndexType right, const CompareType& pivot){
        IndexType idx = left;
        while( idx <= right && CompareType(array[idx]) <= pivot){
            idx += 1;
        }
        left = idx;
46

47 48 49 50 51 52
        for( ; idx <= right ; ++idx){
            if( CompareType(array[idx]) <= pivot ){
                Swap(array[idx],array[left]);
                left += 1;
            }
        }
53

54 55
        return left;
    }
56

57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81
    static std::vector<PackData> Distribute(const int currentRank, const int currentNbProcs ,
                                            const Partition globalElementBalance[], const Partition globalElementBalanceSum[],
                                            const int procInTheMiddle, const bool inFromRightToLeft){
        // First agree on who send and who recv
        const int firstProcToSend = (inFromRightToLeft ? procInTheMiddle+1 : 0);
        const int lastProcToSend  = (inFromRightToLeft ? currentNbProcs : procInTheMiddle+1);
        const int firstProcToRecv = (inFromRightToLeft ? 0 : procInTheMiddle+1);
        const int lastProcToRecv  = (inFromRightToLeft ? procInTheMiddle+1 : currentNbProcs);
        // Get the number of element depending on the lower or greater send/recv
        const IndexType totalElementToProceed = (inFromRightToLeft ?
                                                     globalElementBalanceSum[lastProcToSend].lowerPart - globalElementBalanceSum[firstProcToSend].lowerPart :
                                                     globalElementBalanceSum[lastProcToSend].greaterPart - globalElementBalanceSum[firstProcToSend].greaterPart);
        const IndexType totalElementAlreadyOwned = (inFromRightToLeft ?
                                                        globalElementBalanceSum[lastProcToRecv].lowerPart - globalElementBalanceSum[firstProcToRecv].lowerPart :
                                                        globalElementBalanceSum[lastProcToRecv].greaterPart - globalElementBalanceSum[firstProcToRecv].greaterPart);

        const int nbProcsToRecv = (lastProcToRecv-firstProcToRecv);
        const int nbProcsToSend = (lastProcToSend-firstProcToSend);
        const IndexType totalElements = (totalElementToProceed+totalElementAlreadyOwned);

        std::vector<IndexType> nbElementsToRecvPerProc;
        nbElementsToRecvPerProc.resize(nbProcsToRecv);
        {
            // Get the number of elements each proc should recv
            IndexType totalRemainingElements = totalElements;
82 83
            IndexType totalAvailableElements = totalElementToProceed;

84 85 86 87 88

            for(int idxProc = firstProcToRecv; idxProc < lastProcToRecv ; ++idxProc){
                const IndexType nbElementsAlreadyOwned = (inFromRightToLeft ? globalElementBalance[idxProc].lowerPart : globalElementBalance[idxProc].greaterPart);
                const IndexType averageNbElementForRemainingProc = (totalRemainingElements)/(lastProcToRecv-idxProc);
                totalRemainingElements -= nbElementsAlreadyOwned;
BRAMAS Berenger's avatar
BRAMAS Berenger committed
89
                FAssertLF(totalRemainingElements >= 0);
90 91 92 93
                if(nbElementsAlreadyOwned < averageNbElementForRemainingProc && totalAvailableElements){
                    nbElementsToRecvPerProc[idxProc - firstProcToRecv] = FMath::Min(totalAvailableElements,
                                                                                    averageNbElementForRemainingProc - nbElementsAlreadyOwned);
                    totalAvailableElements -= nbElementsToRecvPerProc[idxProc - firstProcToRecv];
94 95 96 97 98
                    totalRemainingElements -= nbElementsToRecvPerProc[idxProc - firstProcToRecv];
                }
                else{
                    nbElementsToRecvPerProc[idxProc - firstProcToRecv] = 0;
                }
BRAMAS Berenger's avatar
BRAMAS Berenger committed
99
                FLOG(if(VerboseLog)  FLog::Controller << "SCALFMM-DEBUG ["  << currentRank << "] nbElementsToRecvPerProc[" << idxProc << "] = " << nbElementsToRecvPerProc[idxProc - firstProcToRecv] << "\n"; )
100
            }
BRAMAS Berenger's avatar
BRAMAS Berenger committed
101
            FAssertLF(totalRemainingElements == 0);
102
        }
103

104 105 106 107 108 109
        // Store in an array the number of element to send
        std::vector<IndexType> nbElementsToSendPerProc;
        nbElementsToSendPerProc.resize(nbProcsToSend);
        for(int idxProc = firstProcToSend; idxProc < lastProcToSend ; ++idxProc){
            const IndexType nbElementsAlreadyOwned = (inFromRightToLeft ? globalElementBalance[idxProc].lowerPart : globalElementBalance[idxProc].greaterPart);
            nbElementsToSendPerProc[idxProc-firstProcToSend] = nbElementsAlreadyOwned;
BRAMAS Berenger's avatar
BRAMAS Berenger committed
110
            FLOG(if(VerboseLog)  FLog::Controller << "SCALFMM-DEBUG ["  << currentRank << "] nbElementsToSendPerProc[" << idxProc << "] = " << nbElementsToSendPerProc[idxProc-firstProcToSend] << "\n"; )
111
        }
112

113 114 115 116 117 118 119 120 121 122
        // Compute all the send recv but keep only the ones related to currentRank
        std::vector<PackData> packs;
        int idxProcSend = 0;
        IndexType positionElementsSend = 0;
        int idxProcRecv = 0;
        IndexType positionElementsRecv = 0;
        while(idxProcSend != nbProcsToSend && idxProcRecv != nbProcsToRecv){
            if(nbElementsToSendPerProc[idxProcSend] == 0){
                idxProcSend += 1;
                positionElementsSend = 0;
123
            }
124 125 126
            else if(nbElementsToRecvPerProc[idxProcRecv] == 0){
                idxProcRecv += 1;
                positionElementsRecv = 0;
127
            }
128 129 130 131 132 133 134 135
            else {
                const IndexType nbElementsInPack = FMath::Min(nbElementsToSendPerProc[idxProcSend], nbElementsToRecvPerProc[idxProcRecv]);
                if(idxProcSend + firstProcToSend == currentRank){
                    PackData pack;
                    pack.idProc      = idxProcRecv + firstProcToRecv;
                    pack.fromElement = positionElementsSend;
                    pack.toElement   = pack.fromElement + nbElementsInPack;
                    packs.push_back(pack);
136
                }
137 138 139 140 141 142
                else if(idxProcRecv + firstProcToRecv == currentRank){
                    PackData pack;
                    pack.idProc      = idxProcSend + firstProcToSend;
                    pack.fromElement = positionElementsRecv;
                    pack.toElement   = pack.fromElement + nbElementsInPack;
                    packs.push_back(pack);
143
                }
144 145 146 147
                nbElementsToSendPerProc[idxProcSend] -= nbElementsInPack;
                nbElementsToRecvPerProc[idxProcRecv] -= nbElementsInPack;
                positionElementsSend += nbElementsInPack;
                positionElementsRecv += nbElementsInPack;
148
            }
149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169
        }

        return packs;
    }

    static void RecvDistribution(SortType ** inPartRecv, IndexType* inNbElementsRecv,
                                 const Partition globalElementBalance[], const Partition globalElementBalanceSum[],
                                 const int procInTheMiddle, const FMpi::FComm& currentComm, const bool inFromRightToLeft){
        // Compute to know what to recv
        const std::vector<PackData> whatToRecvFromWho = Distribute(currentComm.processId(), currentComm.processCount(),
                                                                   globalElementBalance, globalElementBalanceSum,
                                                                   procInTheMiddle, inFromRightToLeft);
        // Count the total number of elements to recv
        IndexType totalToRecv = 0;
        for(const PackData& pack : whatToRecvFromWho){
            totalToRecv += pack.toElement - pack.fromElement;
        }
        // Alloc buffer
        SortType* recvBuffer = new SortType[totalToRecv];

        // Recv all data
170 171
        std::vector<MPI_Request> requests;
        requests.reserve(whatToRecvFromWho.size());
172 173
        for(int idxPack = 0 ; idxPack < int(whatToRecvFromWho.size()) ; ++idxPack){
            const PackData& pack = whatToRecvFromWho[idxPack];
BRAMAS Berenger's avatar
BRAMAS Berenger committed
174
            FLOG(if(VerboseLog)  FLog::Controller << "SCALFMM-DEBUG ["  << currentComm.processId() << "] Recv from " << pack.idProc << " from " << pack.fromElement << " to " << pack.toElement << "\n"; );
175 176
            FAssertLF(pack.toElement <= totalToRecv);
            FMpi::IRecvSplit(&recvBuffer[pack.fromElement],
BRAMAS Berenger's avatar
BRAMAS Berenger committed
177 178 179 180 181
                    (pack.toElement - pack.fromElement),
                    pack.idProc,
                    FMpi::TagQuickSort,
                    currentComm,
                    &requests);
182

183
        }
184
        FAssertLF(whatToRecvFromWho.size() <= requests.size());
BRAMAS Berenger's avatar
BRAMAS Berenger committed
185 186
        FLOG(if(VerboseLog)  FLog::Controller << "SCALFMM-DEBUG ["  << "Wait for " << requests.size() << " request \n" );
        FLOG(if(VerboseLog)  FLog::Controller.flush());
187
        // Wait to complete
BRAMAS Berenger's avatar
BRAMAS Berenger committed
188
        FMpi::Assert( MPI_Waitall(int(requests.size()), requests.data(), MPI_STATUSES_IGNORE),  __LINE__ );
BRAMAS Berenger's avatar
BRAMAS Berenger committed
189 190
        FLOG(if(VerboseLog)  FLog::Controller << "SCALFMM-DEBUG ["  << currentComm.processId() << "] Recv Done \n"; )
                FLOG(if(VerboseLog)  FLog::Controller.flush());
191 192 193 194
        // Copy to ouput variables
        (*inPartRecv) = recvBuffer;
        (*inNbElementsRecv) = totalToRecv;
    }
195

196
    static void SendDistribution(const SortType * inPartToSend, const IndexType /*inNbElementsToSend*/,
197 198 199 200 201 202 203 204
                                 const Partition globalElementBalance[], const Partition globalElementBalanceSum[],
                                 const int procInTheMiddle, const FMpi::FComm& currentComm, const bool inFromRightToLeft){
        // Compute to know what to send
        const std::vector<PackData> whatToSendToWho = Distribute(currentComm.processId(), currentComm.processCount(),
                                                                 globalElementBalance, globalElementBalanceSum,
                                                                 procInTheMiddle, inFromRightToLeft);

        // Post send messages
205 206
        std::vector<MPI_Request> requests;
        requests.reserve(whatToSendToWho.size());
207 208
        for(int idxPack = 0 ; idxPack < int(whatToSendToWho.size()) ; ++idxPack){
            const PackData& pack = whatToSendToWho[idxPack];
BRAMAS Berenger's avatar
BRAMAS Berenger committed
209
            FLOG(if(VerboseLog)  FLog::Controller << "SCALFMM-DEBUG ["  << currentComm.processId() << "] Send to " << pack.idProc << " from " << pack.fromElement << " to " << pack.toElement << "\n"; );
210

211
            FMpi::ISendSplit(&inPartToSend[pack.fromElement],
BRAMAS Berenger's avatar
BRAMAS Berenger committed
212 213 214 215 216
                    (pack.toElement - pack.fromElement),
                    pack.idProc,
                    FMpi::TagQuickSort,
                    currentComm,
                    &requests);
217
        }
218
        FAssertLF(whatToSendToWho.size() <= requests.size());
BRAMAS Berenger's avatar
BRAMAS Berenger committed
219 220
        FLOG(if(VerboseLog)  FLog::Controller << "SCALFMM-DEBUG [" << currentComm.processId() << "] Wait for " << requests.size() << " request \n" );
        FLOG(if(VerboseLog)  FLog::Controller.flush());
221
        // Wait to complete
BRAMAS Berenger's avatar
BRAMAS Berenger committed
222
        FMpi::Assert( MPI_Waitall(int(requests.size()), requests.data(), MPI_STATUSES_IGNORE),  __LINE__ );
BRAMAS Berenger's avatar
BRAMAS Berenger committed
223 224
        FLOG(if(VerboseLog)  FLog::Controller << "SCALFMM-DEBUG ["  << currentComm.processId() << "] Send Done \n"; )
                FLOG(if(VerboseLog)  FLog::Controller.flush());
225
    }
226

227 228 229 230 231 232 233 234 235 236 237 238
    static CompareType SelectPivot(const SortType workingArray[], const IndexType currentSize, const FMpi::FComm& currentComm, bool* shouldStop){
        enum ValuesState{
            ALL_THE_SAME,
            NO_VALUES,
            AVERAGE_2
        };
        // Check if all the same
        bool allTheSame = true;
        // Check if empty
        const bool noValues = (currentSize == 0);
        // Get the local pivot if not empty
        CompareType localPivot = CompareType(0);
BRAMAS Berenger's avatar
BRAMAS Berenger committed
239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254

        if(noValues == false){
            // We need to know the max value to ensure that the pivot will be different
            CompareType maxFoundValue = CompareType(workingArray[0]);
            // We need to know the min value to ensure that the pivot will be different
            CompareType minFoundValue = CompareType(workingArray[0]);

            for(int idx = 1 ; idx < currentSize ; ++idx){
                // Keep the max
                maxFoundValue = FMath::Max(maxFoundValue , CompareType(workingArray[idx]));
                // Keep the min
                minFoundValue = FMath::Min(minFoundValue , CompareType(workingArray[idx]));
            }
            allTheSame = (maxFoundValue == minFoundValue);
            // Value equal to pivot are kept on the left so
            localPivot = ((maxFoundValue-minFoundValue)/2) + minFoundValue;
255
            // The pivot must be different (to ensure that the partition will return two parts)
256
            if( localPivot == maxFoundValue && !allTheSame){
BRAMAS Berenger's avatar
BRAMAS Berenger committed
257 258
                FLOG(if(VerboseLog)  FLog::Controller << "SCALFMM-DEBUG ["  << currentComm.processId() << "] Pivot " << localPivot << " is equal max and allTheSame equal " << allTheSame << "\n"; )
                        FLOG(if(VerboseLog)  FLog::Controller.flush());
259 260
                localPivot -= 1;
            }
261
        }
262

BRAMAS Berenger's avatar
BRAMAS Berenger committed
263 264
        FLOG(if(VerboseLog)  FLog::Controller << "SCALFMM-DEBUG ["  << currentComm.processId() << "] localPivot = " << localPivot << "\n" );
        FLOG(if(VerboseLog)  FLog::Controller.flush());
265

266
        //const int myRank = currentComm.processId();
267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284
        const int nbProcs = currentComm.processCount();
        // Exchange the pivos and the state
        std::unique_ptr<int[]> allProcStates(new int[nbProcs]);
        std::unique_ptr<CompareType[]> allProcPivots(new CompareType[nbProcs]);
        {
            int myState = (noValues?NO_VALUES:(allTheSame?ALL_THE_SAME:AVERAGE_2));
            FMpi::Assert( MPI_Allgather( &myState, 1, MPI_INT, allProcStates.get(),
                                         1, MPI_INT, currentComm.getComm()),  __LINE__ );
            FMpi::Assert( MPI_Allgather( &localPivot, sizeof(CompareType), MPI_BYTE, allProcPivots.get(),
                                         sizeof(CompareType), MPI_BYTE, currentComm.getComm()),  __LINE__ );
        }
        // Test if all the procs have ALL_THE_SAME and the same value
        bool allProcsAreSame = true;
        for(int idxProc = 0 ; idxProc < nbProcs && allProcsAreSame; ++idxProc){
            if(allProcStates[idxProc] != NO_VALUES && (allProcStates[idxProc] != ALL_THE_SAME || allProcPivots[0] != allProcPivots[idxProc])){
                allProcsAreSame = false;
            }
        }
285

286 287 288 289 290 291 292 293 294 295 296 297 298
        if(allProcsAreSame){
            // All the procs are the same so we need to stop
            (*shouldStop) = true;
            return CompareType(0);
        }
        else{
            CompareType globalPivot = 0;
            CompareType counterValuesInPivot = 0;
            // Compute the pivos
            for(int idxProc = 0 ; idxProc < nbProcs; ++idxProc){
                if(allProcStates[idxProc] != NO_VALUES){
                    globalPivot += allProcPivots[idxProc];
                    counterValuesInPivot += 1;
299
                }
300
            }
301 302 303 304
            if(counterValuesInPivot <= 1){
                (*shouldStop) = true;
                return globalPivot;
            }
305 306 307 308
            (*shouldStop) = false;
            return globalPivot/counterValuesInPivot;
        }
    }
309

310
public:
311

312 313 314 315 316 317
    static void QsMpi(const SortType originalArray[], const IndexType originalSize,
                      SortType** outputArray, IndexType* outputSize, const FMpi::FComm& originalComm){
        // We do not work in place, so create a new array
        IndexType currentSize = originalSize;
        SortType* workingArray = new SortType[currentSize];
        FMemUtils::memcpy(workingArray, originalArray, sizeof(SortType) * currentSize);
318

319 320
        // Clone the MPI group because we will reduce it after each partition
        FMpi::FComm currentComm(originalComm.getComm());
321

322 323 324 325 326 327
        // Parallel sharing until I am alone on the data
        while( currentComm.processCount() != 1 ){
            // Agree on the Pivot
            bool shouldStop;
            const CompareType globalPivot = SelectPivot(workingArray, currentSize, currentComm, &shouldStop);
            if(shouldStop){
BRAMAS Berenger's avatar
BRAMAS Berenger committed
328 329
                FLOG(if(VerboseLog)  FLog::Controller << "SCALFMM-DEBUG ["  << currentComm.processId() << "] shouldStop = " << shouldStop << "\n" );
                FLOG(if(VerboseLog)  FLog::Controller.flush());
330
                break;
331 332
            }

333
            FLOG(if(VerboseLog)  FLog::Controller << "SCALFMM-DEBUG ["  << currentComm.processId() << "] globalPivot = " << globalPivot << " for " << currentComm.processCount() << "\n" );
BRAMAS Berenger's avatar
BRAMAS Berenger committed
334
            FLOG(if(VerboseLog)  FLog::Controller.flush());
335

336 337 338
            // Split the array in two parts lower equal to pivot and greater than pivot
            const IndexType nbLowerElements = QsPartition(workingArray, 0, currentSize-1, globalPivot);
            const IndexType nbGreaterElements = currentSize - nbLowerElements;
339

BRAMAS Berenger's avatar
BRAMAS Berenger committed
340 341
            FLOG(if(VerboseLog)  FLog::Controller << "SCALFMM-DEBUG ["  << currentComm.processId() << "] After Partition: lower = " << nbLowerElements << " greater = " << nbGreaterElements << "\n"; )
                    FLOG(if(VerboseLog)  FLog::Controller.flush());
342

343 344
            const int currentRank = currentComm.processId();
            const int currentNbProcs = currentComm.processCount();
345

346 347 348
            // We need to know what each process is holding
            Partition currentElementsBalance = { nbLowerElements, nbGreaterElements};
            Partition globalElementBalance[currentNbProcs];
349

350 351 352
            // Every one in the group need to know
            FMpi::Assert( MPI_Allgather( &currentElementsBalance, sizeof(Partition), MPI_BYTE, globalElementBalance,
                                         sizeof(Partition), MPI_BYTE, currentComm.getComm()),  __LINE__ );
353

354 355 356 357 358 359 360 361 362 363 364
            // Find the number of elements lower or greater
            IndexType globalNumberOfElementsGreater = 0;
            IndexType globalNumberOfElementsLower = 0;
            Partition globalElementBalanceSum[currentNbProcs + 1];
            globalElementBalanceSum[0].lowerPart = 0;
            globalElementBalanceSum[0].greaterPart = 0;
            for(int idxProc = 0 ; idxProc < currentNbProcs ; ++idxProc){
                globalElementBalanceSum[idxProc + 1].lowerPart = globalElementBalanceSum[idxProc].lowerPart + globalElementBalance[idxProc].lowerPart;
                globalElementBalanceSum[idxProc + 1].greaterPart = globalElementBalanceSum[idxProc].greaterPart + globalElementBalance[idxProc].greaterPart;
                globalNumberOfElementsGreater += globalElementBalance[idxProc].greaterPart;
                globalNumberOfElementsLower   += globalElementBalance[idxProc].lowerPart;
365 366
            }

BRAMAS Berenger's avatar
BRAMAS Berenger committed
367 368 369
            FLOG(if(VerboseLog)  FLog::Controller << "SCALFMM-DEBUG ["  << currentComm.processId() << "] globalNumberOfElementsGreater = " << globalNumberOfElementsGreater << "\n"; )
                    FLOG(if(VerboseLog)  FLog::Controller << "SCALFMM-DEBUG ["  << currentComm.processId() << "] globalNumberOfElementsLower   = " << globalNumberOfElementsLower << "\n"; )
                    FLOG(if(VerboseLog)  FLog::Controller.flush());
370 371 372 373 374

            // The proc rank in the middle from the percentage
            int procInTheMiddle;
            if(globalNumberOfElementsLower == 0)        procInTheMiddle = -1;
            else if(globalNumberOfElementsGreater == 0) procInTheMiddle = currentNbProcs-1;
BRAMAS Berenger's avatar
BRAMAS Berenger committed
375
            else procInTheMiddle = int(FMath::Min(IndexType(currentNbProcs-2), (currentNbProcs*globalNumberOfElementsLower)
BRAMAS Berenger's avatar
BRAMAS Berenger committed
376
                                                  /(globalNumberOfElementsGreater + globalNumberOfElementsLower)));
377

BRAMAS Berenger's avatar
BRAMAS Berenger committed
378 379
            FLOG(if(VerboseLog)  FLog::Controller << "SCALFMM-DEBUG ["  << currentComm.processId() << "] procInTheMiddle = " << procInTheMiddle << "\n"; )
                    FLOG(if(VerboseLog)  FLog::Controller.flush());
380 381 382 383 384 385 386 387 388 389 390 391 392

            // Send or receive depending on the state
            if(currentRank <= procInTheMiddle){
                // I am in the group of the lower elements
                SendDistribution(workingArray + nbLowerElements, nbGreaterElements,
                                 globalElementBalance, globalElementBalanceSum, procInTheMiddle, currentComm, false);
                SortType* lowerPartRecv = nullptr;
                IndexType nbLowerElementsRecv = 0;
                RecvDistribution(&lowerPartRecv, &nbLowerElementsRecv,
                                 globalElementBalance, globalElementBalanceSum, procInTheMiddle, currentComm, true);
                // Merge previous part and just received elements
                const IndexType fullNbLowerElementsRecv = nbLowerElementsRecv + nbLowerElements;
                SortType* fullLowerPart = new SortType[fullNbLowerElementsRecv];
393 394
                std::memcpy(reinterpret_cast<char*>(fullLowerPart), workingArray, sizeof(SortType)* nbLowerElements);
                std::memcpy(reinterpret_cast<char*>(fullLowerPart + nbLowerElements), lowerPartRecv, sizeof(SortType)* nbLowerElementsRecv);
395 396 397 398 399
                delete[] workingArray;
                delete[] lowerPartRecv;
                workingArray = fullLowerPart;
                currentSize = fullNbLowerElementsRecv;
                // Reduce working group
BRAMAS Berenger's avatar
BRAMAS Berenger committed
400 401
                FLOG(if(VerboseLog)  FLog::Controller << "SCALFMM-DEBUG ["  << currentComm.processId() << "] Reduce group to " << 0 << " / " << procInTheMiddle << "\n"; )
                        FLOG(if(VerboseLog)  FLog::Controller.flush());
402
                currentComm.groupReduce( 0, procInTheMiddle);
BRAMAS Berenger's avatar
BRAMAS Berenger committed
403 404
                FLOG(if(VerboseLog)  FLog::Controller << "SCALFMM-DEBUG ["  << currentComm.processId() << "] Done\n" );
                FLOG(if(VerboseLog)  FLog::Controller.flush());
405 406 407 408 409 410 411 412 413 414 415 416
            }
            else {
                // I am in the group of the greater elements
                SortType* greaterPartRecv = nullptr;
                IndexType nbGreaterElementsRecv = 0;
                RecvDistribution(&greaterPartRecv, &nbGreaterElementsRecv,
                                 globalElementBalance, globalElementBalanceSum, procInTheMiddle, currentComm, false);
                SendDistribution(workingArray, nbLowerElements,
                                 globalElementBalance, globalElementBalanceSum, procInTheMiddle, currentComm, true);
                // Merge previous part and just received elements
                const IndexType fullNbGreaterElementsRecv = nbGreaterElementsRecv + nbGreaterElements;
                SortType* fullGreaterPart = new SortType[fullNbGreaterElementsRecv];
417 418
                std::memcpy(reinterpret_cast<char*>(fullGreaterPart), workingArray + nbLowerElements, sizeof(SortType)* nbGreaterElements);
                std::memcpy(reinterpret_cast<char*>(fullGreaterPart + nbGreaterElements), greaterPartRecv, sizeof(SortType)* nbGreaterElementsRecv);
419 420 421 422 423
                delete[] workingArray;
                delete[] greaterPartRecv;
                workingArray = fullGreaterPart;
                currentSize = fullNbGreaterElementsRecv;
                // Reduce working group
BRAMAS Berenger's avatar
BRAMAS Berenger committed
424 425
                FLOG( if(VerboseLog) FLog::Controller << "SCALFMM-DEBUG ["  << currentComm.processId() << "] Reduce group to " << procInTheMiddle + 1 << " / " << currentNbProcs - 1 << "\n"; )
                        FLOG( if(VerboseLog) FLog::Controller.flush());
426
                currentComm.groupReduce( procInTheMiddle + 1, currentNbProcs - 1);
BRAMAS Berenger's avatar
BRAMAS Berenger committed
427 428
                FLOG( if(VerboseLog) FLog::Controller << "SCALFMM-DEBUG ["  << currentComm.processId() << "] Done\n"; )
                        FLOG( if(VerboseLog) FLog::Controller.flush());
429
            }
430 431
        }

BRAMAS Berenger's avatar
BRAMAS Berenger committed
432 433
        FLOG( if(VerboseLog) FLog::Controller << "SCALFMM-DEBUG ["  << currentComm.processId() << "] Sequential sort (currentSize = " << currentSize << ")\n"; )
        FLOG( if(VerboseLog) FLog::Controller.flush());
434
        // Finish by a local sort
435
        FQuickSort< SortType, IndexType>::QsOmp(workingArray, currentSize, [](const SortType& v1, const SortType& v2){
436 437
            return CompareType(v1) <= CompareType(v2);
        });
438 439
        (*outputSize)  = currentSize;
        (*outputArray) = workingArray;
440 441 442
    }
};

BRAMAS Berenger's avatar
BRAMAS Berenger committed
443 444 445 446 447 448

#ifdef SCALFMM_USE_LOG
template <class SortType, class CompareType, class IndexType>
const bool FQuickSortMpi<SortType, CompareType, IndexType>::VerboseLog = FEnv::GetBool("SCALFMM_DEBUG_LOG", false);
#endif

449
#endif // FQUICKSORTMPI_HPP