FBitonicSort.hpp 7.62 KB
Newer Older
1
// ===================================================================================
2 3 4 5 6 7 8 9 10 11 12 13 14
// Copyright ScalFmm 2011 INRIA, Olivier Coulaud, Bérenger Bramas, Matthias Messner
// olivier.coulaud@inria.fr, berenger.bramas@inria.fr
// This software is a computer program whose purpose is to compute the FMM.
//
// This software is governed by the CeCILL-C and LGPL licenses and
// abiding by the rules of distribution of free software.  
// 
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public and CeCILL-C Licenses for more details.
// "http://www.cecill.info". 
// "http://www.gnu.org/licenses".
15
// ===================================================================================
berenger-bramas's avatar
berenger-bramas committed
16 17 18 19 20 21
#ifndef BITONICSORT_HPP
#define BITONICSORT_HPP

#include <cstdlib>
#include <cmath>

22

berenger-bramas's avatar
berenger-bramas committed
23
#include "FMpi.hpp"
24
#include "FQuickSort.hpp"
25
#include "FAssert.hpp"
26 27 28 29 30 31 32

/** This class is a parallel bitonic sort
  * it is based on the paper :
  * Library Support for Parallel Sorting in Scientific Computations
  * Holger Dachsel1 , Michael Hofmann2, , and Gudula R ̈nger2
  */
template <class SortType, class CompareType, class IndexType>
berenger-bramas's avatar
berenger-bramas committed
33
class FBitonicSort {
berenger-bramas's avatar
berenger-bramas committed
34 35 36 37 38 39 40 41
private:

    ////////////////////////////////////////////////////////////////
    // Bitonic parallel sort !
    ////////////////////////////////////////////////////////////////

    // This function exchange data with the other rank,
    // its send the max value and receive min value
42
    static void SendMaxAndGetMin(SortType array[], const IndexType size, const int otherRank, const FMpi::FComm& comm){
43 44 45
        IndexType left  = -1;
        IndexType right = size - 1;
        IndexType pivot = left + (right - left + 1)/2;
berenger-bramas's avatar
berenger-bramas committed
46 47
        CompareType otherValue = -1;
        CompareType tempCompareValue = CompareType(array[pivot]);
48 49
        MPI_Sendrecv(&tempCompareValue,sizeof(CompareType),MPI_BYTE,otherRank,FMpi::TagBitonicMin,&otherValue,sizeof(CompareType),
                     MPI_BYTE,otherRank,FMpi::TagBitonicMax,comm.getComm(),MPI_STATUS_IGNORE);
berenger-bramas's avatar
berenger-bramas committed
50 51 52 53 54 55 56 57 58 59 60 61

        while( pivot != left && pivot != right  && array[pivot] != otherValue) {

            if( array[pivot] < otherValue ){
                left = pivot;
            }
            else {
                right = pivot;
            }
            pivot = left + (right - left + 1)/2;
            tempCompareValue = CompareType(array[pivot]);

62 63
            MPI_Sendrecv(&tempCompareValue,sizeof(CompareType),MPI_BYTE,otherRank,FMpi::TagBitonicMin,
                         &otherValue,sizeof(CompareType),MPI_BYTE,otherRank,FMpi::TagBitonicMax,comm.getComm(),MPI_STATUS_IGNORE);
berenger-bramas's avatar
berenger-bramas committed
64 65 66 67
        }

        if( otherValue <= array[pivot] ){
            MPI_Sendrecv_replace(&array[pivot], int((size - pivot) * sizeof(SortType)) , MPI_BYTE,
68
                                   otherRank, FMpi::TagBitonicMinMess, otherRank, FMpi::TagBitonicMaxMess,
69
                                   comm.getComm(), MPI_STATUS_IGNORE);
berenger-bramas's avatar
berenger-bramas committed
70 71 72 73 74

        }
        else if( array[pivot] < otherValue){
            if(pivot != size - 1){
                MPI_Sendrecv_replace(&array[pivot + 1], int((size - pivot - 1) * sizeof(SortType)) , MPI_BYTE,
75
                                       otherRank, FMpi::TagBitonicMinMess, otherRank, FMpi::TagBitonicMaxMess,
76
                                       comm.getComm(), MPI_STATUS_IGNORE);
berenger-bramas's avatar
berenger-bramas committed
77 78 79 80 81 82 83
            }
        }

    }

    // This function exchange data with the other rank,
    // its send the min value and receive max value
84
    static void SendMinAndGetMax(SortType array[], const IndexType size, const int otherRank, const FMpi::FComm& comm){
85 86 87
        IndexType left  = 0;
        IndexType right = size ;
        IndexType pivot = left + (right - left)/2;
berenger-bramas's avatar
berenger-bramas committed
88 89
        CompareType otherValue = -1;
        CompareType tempCompareValue = CompareType(array[pivot]);
90 91
        MPI_Sendrecv(&tempCompareValue,sizeof(CompareType),MPI_BYTE,otherRank,FMpi::TagBitonicMax,
                     &otherValue,sizeof(CompareType),MPI_BYTE,otherRank,FMpi::TagBitonicMin,comm.getComm(),MPI_STATUS_IGNORE);
berenger-bramas's avatar
berenger-bramas committed
92 93 94 95 96 97 98 99 100 101 102

        while(  pivot != left  && array[pivot] != otherValue) {

            if( array[pivot] < otherValue ){
                left = pivot;
            }
            else {
                right = pivot;
            }
            pivot = left + (right - left)/2;
            tempCompareValue = CompareType(array[pivot]);
103 104
            MPI_Sendrecv(&tempCompareValue,sizeof(CompareType),MPI_BYTE,otherRank,FMpi::TagBitonicMax,
                         &otherValue,sizeof(CompareType),MPI_BYTE,otherRank,FMpi::TagBitonicMin,comm.getComm(),MPI_STATUS_IGNORE);
berenger-bramas's avatar
berenger-bramas committed
105 106 107 108 109
        }


        if( array[pivot] <= otherValue ){
            MPI_Sendrecv_replace(&array[0], int((pivot + 1) * sizeof(SortType)) , MPI_BYTE,
110
                                   otherRank, FMpi::TagBitonicMaxMess, otherRank, FMpi::TagBitonicMinMess,
111
                                   comm.getComm(), MPI_STATUS_IGNORE);
berenger-bramas's avatar
berenger-bramas committed
112 113 114 115
        }
        else if( otherValue < array[pivot]){
            if(pivot != 0){
                MPI_Sendrecv_replace(&array[0], int((pivot) * sizeof(SortType)) , MPI_BYTE,
116
                                       otherRank, FMpi::TagBitonicMaxMess, otherRank, FMpi::TagBitonicMinMess,
117
                                       comm.getComm(), MPI_STATUS_IGNORE);
berenger-bramas's avatar
berenger-bramas committed
118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145
            }
        }
    }


public:

    /*
    From :
    http://web.mst.edu/~ercal/387/P3/pr-proj-3.pdf

    Parallel Bitonic Sort Algorithm for processor Pk (for k := 0 . . . P − 1)
    d:= log P
    // cube dimension
    sort(local − datak ) // sequential sort
    // Bitonic Sort follows
    for i:=1 to d do
        window-id = Most Significant (d-i) bits of Pk
        for j:=(i-1) down to 0 do
            if((window-id is even AND j th bit of Pk = 0)
            OR (window-id is odd AND j th bit of Pk = 1))
                then call CompareLow(j)
            else
                call CompareHigh(j)
            endif
        endfor
    endfor
      */
146 147 148
    static void Sort(SortType array[], const IndexType size, const FMpi::FComm& comm){
        const int np = comm.processCount();
        const int rank = comm.processId();
berenger-bramas's avatar
berenger-bramas committed
149

150 151 152 153 154 155
        {// Work only for power of 2!
            int flag = 1;
            while(!(np & flag)) flag <<= 1;
            FAssert(np == flag, "Bitonic sort work only with power of 2 for now.")
        }

156 157 158
        FQuickSort<SortType,IndexType>::QsOmp(array, size, [&](const SortType& v1, const SortType& v2){
            return CompareType(v1) <= CompareType(v2);
        });
berenger-bramas's avatar
berenger-bramas committed
159 160 161 162 163 164 165 166 167 168 169 170 171 172

        const int logNp = int(log2(np));
        for(int bitIdx = 1 ; bitIdx <= logNp ; ++bitIdx){
            // window-id = Most Significant (d-i) bits of Pk
            const int diBit =  (rank >> bitIdx) & 0x1;

            for(int otherBit = bitIdx - 1 ; otherBit >= 0 ; --otherBit){
                // if((window-id is even AND j th bit of Pk = 0)
                // OR (window-id is odd AND j th bit of Pk = 1))

                const int myOtherBit = (rank >> otherBit) & 0x1;
                const int otherRank = rank ^ (1 << otherBit);

                if( diBit != myOtherBit ){
173
                    SendMinAndGetMax(array, size, otherRank, comm);
berenger-bramas's avatar
berenger-bramas committed
174 175
                }
                else{
176
                    SendMaxAndGetMin(array, size, otherRank, comm);
berenger-bramas's avatar
berenger-bramas committed
177 178 179
                }
                // A merge sort is possible since the array is composed
                // by two part already sorted, but we want to do this in space
180 181 182
                FQuickSort<SortType,IndexType>::QsOmp(array, size, [&](const SortType& v1, const SortType& v2){
                    return CompareType(v1) <= CompareType(v2);
                });
berenger-bramas's avatar
berenger-bramas committed
183 184 185 186 187 188
            }
        }
    }
};

#endif // BITONICSORT_HPP