FQuickSort.hpp 7.06 KB
Newer Older
1
// ===================================================================================
2
3
4
5
6
7
8
9
10
11
12
13
14
// Copyright ScalFmm 2011 INRIA, Olivier Coulaud, Bérenger Bramas, Matthias Messner
// olivier.coulaud@inria.fr, berenger.bramas@inria.fr
// This software is a computer program whose purpose is to compute the FMM.
//
// This software is governed by the CeCILL-C and LGPL licenses and
// abiding by the rules of distribution of free software.  
// 
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public and CeCILL-C Licenses for more details.
// "http://www.cecill.info". 
// "http://www.gnu.org/licenses".
15
// ===================================================================================
16
17
18
19
20
21
22
23
#ifndef FQUICKSORT_HPP
#define FQUICKSORT_HPP

#include <omp.h>
#include <cmath>
#include <ctime>
#include <cstdlib>
#include <cstring>
24
#include <vector> // For parallel without task
25
#include <utility> // For move
26

27
28
#include "FGlobal.hpp"
#include "FMemUtils.hpp"
29

30

31
32
33
34
35
36
37
38
39
40
/** This class is parallel quick sort
  * It hold a mpi version
  * + 2 openmp versions (one on tasks and the other like mpi)
  * + a sequential version
  *
  * The task based algorithm is easy to undestand,
  * for the mpi/openmp2nd please see
  * Introduction to parallel computing (Grama Gupta Karypis Kumar)
  */

41
template <class SortType, class CompareType, class IndexType>
42
class FQuickSort {
43
protected:
44
45
46
    /** swap to value */
    template <class NumType>
    static inline void Swap(NumType& value, NumType& other){
47
48
49
        const NumType temp = std::move(value);
        value = std::move(other);
        other = std::move(temp);
50
51
52
53
54
55
    }

    ////////////////////////////////////////////////////////////
    // Quick sort
    ////////////////////////////////////////////////////////////

56
    /* Use in the sequential qs */
57
    static IndexType QsPartition(SortType array[], IndexType left, IndexType right){
58
        Swap(array[right],array[((right - left ) / 2) + left]);
59

60
61
62
        IndexType idx = left;
        while( idx < right && CompareType(array[idx]) <= CompareType(array[right])){
            idx += 1;
63
        }
64
        left = idx;
65

66
67
68
69
        for( ; idx < right ; ++idx){
            if( CompareType(array[idx]) <= CompareType(array[right]) ){
                Swap(array[idx],array[left]);
                left += 1;
70
71
72
            }
        }

73
74
75
        Swap(array[left],array[right]);

        return left;
76
77
    }

PIACIBELLO Cyrille's avatar
PIACIBELLO Cyrille committed
78

79
    /* The sequential qs */
80
    static void QsSequentialStep(SortType array[], const IndexType left, const IndexType right){
81
        if(left < right){
82
83
84
85
86
87
88
89
90
            const IndexType part = QsPartition(array, left, right);
            QsSequentialStep(array,part + 1,right);
            QsSequentialStep(array,left,part - 1);
        }
    }

    /** A task dispatcher */
    static void QsOmpTask(SortType array[], const IndexType left, const IndexType right, const int deep){
        if(left < right){
91
            const IndexType part = QsPartition(array, left, right);
92
93
94
95
96
97
98
99
100
101
            if( deep ){
                #pragma omp task
                QsOmpTask(array,part + 1,right, deep - 1);
                #pragma omp task
                QsOmpTask(array,left,part - 1, deep - 1);
            }
            else {
                QsSequentialStep(array,part + 1,right);
                QsSequentialStep(array,left,part - 1);
            }
102
103
104
        }
    }

105
106
107
108
109
public:
    /* a sequential qs */
    static void QsSequential(SortType array[], const IndexType size){
        QsSequentialStep(array,0,size-1);
    }
110

111
#if _OPENMP < 200805 || defined(__ICC) || defined(__INTEL_COMPILER)
BRAMAS Berenger's avatar
BRAMAS Berenger committed
112
113
114
115
116
    class TaskInterval{
        IndexType left;
        IndexType right;
        int deep;
    public:
117
118
119
        TaskInterval(const IndexType inLeft, const IndexType inRight, const int inDeep)
            : left(inLeft), right(inRight), deep(inDeep){
        }
BRAMAS Berenger's avatar
BRAMAS Berenger committed
120
121
122
123
124
125
126
127
128
129

        IndexType getLeft() const{
            return left;
        }
        IndexType getRight() const{
            return right;
        }
        int getDeep() const{
            return deep;
        }
130
131
132
133
    };

    static void QsOmp(SortType elements[], const int nbElements){
        const int nbTasksRequiere = (omp_get_max_threads() * 5);
BRAMAS Berenger's avatar
BRAMAS Berenger committed
134
135
        int deep = 0;
        while( (1 << deep) < nbTasksRequiere ) deep += 1;
136
137
138
139
140
141

        std::vector<TaskInterval> tasks;
        tasks.push_back(TaskInterval(0, nbElements-1, deep));
        int numberOfThreadProceeding = 0;
        omp_lock_t mutexShareVariable;
        omp_init_lock(&mutexShareVariable);
142
143
144

        #pragma omp parallel
        {
145
146
147
148
149
150
151
152
153
154
            bool hasWorkToDo = true;
            while(hasWorkToDo){
                // Ask for the mutex
                omp_set_lock(&mutexShareVariable);
                if(tasks.size()){
                    // There is tasks to proceed
                    const TaskInterval ts(tasks.back());
                    tasks.pop_back();

                    // Does this task should create some other?
BRAMAS Berenger's avatar
BRAMAS Berenger committed
155
                    if(ts.getDeep() == 0){
156
157
                        // No release the mutex and run in seq
                        omp_unset_lock(&mutexShareVariable);
BRAMAS Berenger's avatar
BRAMAS Berenger committed
158
                        QsSequentialStep(elements , ts.getLeft(), ts.getRight());
159
160
161
162
163
164
165
                    }
                    else{
                        // Yes so inform other and release the mutex
                        numberOfThreadProceeding += 1;
                        omp_unset_lock(&mutexShareVariable);

                        // Partition
BRAMAS Berenger's avatar
BRAMAS Berenger committed
166
                        const IndexType part = QsPartition(elements, ts.getLeft(), ts.getRight());
167
168
169

                        // Push the new task in the vector
                        omp_set_lock(&mutexShareVariable);
BRAMAS Berenger's avatar
BRAMAS Berenger committed
170
171
                        tasks.push_back(TaskInterval(part+1, ts.getRight(), ts.getDeep()-1));
                        tasks.push_back(TaskInterval(ts.getLeft(), part-1, ts.getDeep()-1));
172
173
174
                        // We create new task but we are not working so inform other
                        numberOfThreadProceeding -= 1;
                        omp_unset_lock(&mutexShareVariable);
175
176
177
                    }
                }
                else{
178
179
180
181
182
183
184
185
                    // There is not task in the vector
                    #pragma omp flush(numberOfThreadProceeding)
                    if(numberOfThreadProceeding == 0){
                        // And there is no thread that may create some tasks so stop here
                        hasWorkToDo = false;
                    }
                    // Release mutex
                    omp_unset_lock(&mutexShareVariable);
186
187
188
189
                }
            }
        }

190
        omp_destroy_lock(&mutexShareVariable);
191
    }
192
#else
193
194
    /** The openmp quick sort */
    static void QsOmp(SortType array[], const IndexType size){
195
        const int nbTasksRequiere = (omp_get_max_threads() * 5);
BRAMAS Berenger's avatar
BRAMAS Berenger committed
196
197
        int deep = 0;
        while( (1 << deep) < nbTasksRequiere ) deep += 1;
198

199
200
201
202
        #pragma omp parallel
        {
            #pragma omp single nowait
            {
203
                QsOmpTask(array, 0, size - 1 , deep);
204
205
206
            }
        }
    }
207
#endif
208
209
210
};

#endif // FQUICKSORT_HPP