FQuickSort.hpp 8.28 KB
Newer Older
1
// ===================================================================================
2
3
4
5
6
7
8
9
10
11
12
13
14
// Copyright ScalFmm 2011 INRIA, Olivier Coulaud, Bérenger Bramas, Matthias Messner
// olivier.coulaud@inria.fr, berenger.bramas@inria.fr
// This software is a computer program whose purpose is to compute the FMM.
//
// This software is governed by the CeCILL-C and LGPL licenses and
// abiding by the rules of distribution of free software.  
// 
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public and CeCILL-C Licenses for more details.
// "http://www.cecill.info". 
// "http://www.gnu.org/licenses".
15
// ===================================================================================
16
17
18
19
20
21
22
23
#ifndef FQUICKSORT_HPP
#define FQUICKSORT_HPP

#include <omp.h>
#include <cmath>
#include <ctime>
#include <cstdlib>
#include <cstring>
24
#include <vector> // For parallel without task
25
#include <utility> // For move
26
#include <functional> // To have std::function
27

28
29
#include "FGlobal.hpp"
#include "FMemUtils.hpp"
30

31

32
33
34
35
36
37
38
39
/** This class is parallel quick sort
  * It hold a mpi version
  * + 2 openmp versions (one on tasks and the other like mpi)
  * + a sequential version
  *
  * The task based algorithm is easy to undestand,
  * for the mpi/openmp2nd please see
  * Introduction to parallel computing (Grama Gupta Karypis Kumar)
40
41
42
43
44
  *
  * In the previous version Intel Compiler was not using the QS with task
  * defined(__ICC) || defined(__INTEL_COMPILER) defined the FQS_TASKS_ARE_DISABLED
  *
  * If needed one can define FQS_FORCE_NO_TASKS to ensure that the no task version is used.
45
46
  */

47
template <class SortType, class IndexType = size_t>
48
class FQuickSort {
49
protected:
50

51
#if _OPENMP < 200805 || defined(FQS_FORCE_NO_TASKS)
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
#define FQS_TASKS_ARE_DISABLED
    class TaskInterval{
        IndexType left;
        IndexType right;
        int deep;
    public:
        TaskInterval(const IndexType inLeft, const IndexType inRight, const int inDeep)
            : left(inLeft), right(inRight), deep(inDeep){
        }

        IndexType getLeft() const{
            return left;
        }
        IndexType getRight() const{
            return right;
        }
        int getDeep() const{
            return deep;
        }
    };
#endif

74
75
76
    /** swap to value */
    template <class NumType>
    static inline void Swap(NumType& value, NumType& other){
77
78
79
        const NumType temp = std::move(value);
        value = std::move(other);
        other = std::move(temp);
80
81
    }

82
83
    typedef bool (*infOrEqualPtr)(const SortType&, const SortType&);

84
85
86
87
    ////////////////////////////////////////////////////////////
    // Quick sort
    ////////////////////////////////////////////////////////////

88
    /* Use in the sequential qs */
89
    static IndexType QsPartition(SortType array[], IndexType left, IndexType right, const infOrEqualPtr infOrEqual){
90
        Swap(array[right],array[((right - left ) / 2) + left]);
91

92
        IndexType idx = left;
93
        while( idx < right && infOrEqual(array[idx],array[right])){
94
            idx += 1;
95
        }
96
        left = idx;
97

98
        for( ; idx < right ; ++idx){
99
            if( infOrEqual(array[idx],array[right]) ){
100
101
                Swap(array[idx],array[left]);
                left += 1;
102
103
104
            }
        }

105
106
107
        Swap(array[left],array[right]);

        return left;
108
109
    }

PIACIBELLO Cyrille's avatar
PIACIBELLO Cyrille committed
110

111
    /* The sequential qs */
112
    static void QsSequentialStep(SortType array[], const IndexType left, const IndexType right, const infOrEqualPtr infOrEqual){
113
        if(left < right){
114
115
116
            const IndexType part = QsPartition(array, left, right, infOrEqual);
            QsSequentialStep(array,part + 1,right, infOrEqual);
            QsSequentialStep(array,left,part - 1, infOrEqual);
117
118
119
120
        }
    }

    /** A task dispatcher */
121
    static void QsOmpTask(SortType array[], const IndexType left, const IndexType right, const int deep, const infOrEqualPtr infOrEqual){
122
        if(left < right){
123
            const IndexType part = QsPartition(array, left, right, infOrEqual);
124
            if( deep ){
125
                #pragma omp task default(none) firstprivate(array, part, right, deep, infOrEqual)
126
                QsOmpTask(array,part + 1,right, deep - 1, infOrEqual);
127
                // #pragma omp task default(none) firstprivate(array, part, right, deep, infOrEqual) // not needed
128
                QsOmpTask(array,left,part - 1, deep - 1, infOrEqual);
129
130
            }
            else {
131
132
                QsSequentialStep(array,part + 1,right, infOrEqual);
                QsSequentialStep(array,left,part - 1, infOrEqual);
133
            }
134
135
136
        }
    }

137
138
public:
    /* a sequential qs */
139
140
    static void QsSequential(SortType array[], const IndexType size, const infOrEqualPtr infOrEqual){
        QsSequentialStep(array, 0, size-1, infOrEqual);
141
    }
142

143
    static void QsSequential(SortType array[], const IndexType size){
144
        QsSequential(array, size, [](const SortType& v1, const SortType& v2){
145
146
147
            return v1 <= v2;
        });
    }
148

149
150
#ifdef FQS_TASKS_ARE_DISABLED
    static void QsOmp(SortType elements[], const int nbElements, const infOrEqualPtr infOrEqual){
151
        const int nbTasksRequiere = (omp_get_max_threads() * 5);
BRAMAS Berenger's avatar
BRAMAS Berenger committed
152
153
        int deep = 0;
        while( (1 << deep) < nbTasksRequiere ) deep += 1;
154
155
156
157
158
159

        std::vector<TaskInterval> tasks;
        tasks.push_back(TaskInterval(0, nbElements-1, deep));
        int numberOfThreadProceeding = 0;
        omp_lock_t mutexShareVariable;
        omp_init_lock(&mutexShareVariable);
160
161
162

        #pragma omp parallel
        {
163
164
165
166
167
168
169
170
171
172
            bool hasWorkToDo = true;
            while(hasWorkToDo){
                // Ask for the mutex
                omp_set_lock(&mutexShareVariable);
                if(tasks.size()){
                    // There is tasks to proceed
                    const TaskInterval ts(tasks.back());
                    tasks.pop_back();

                    // Does this task should create some other?
BRAMAS Berenger's avatar
BRAMAS Berenger committed
173
                    if(ts.getDeep() == 0){
174
175
                        // No release the mutex and run in seq
                        omp_unset_lock(&mutexShareVariable);
176
                        QsSequentialStep(elements , ts.getLeft(), ts.getRight(), infOrEqual);
177
178
179
180
181
182
183
                    }
                    else{
                        // Yes so inform other and release the mutex
                        numberOfThreadProceeding += 1;
                        omp_unset_lock(&mutexShareVariable);

                        // Partition
184
                        const IndexType part = QsPartition(elements, ts.getLeft(), ts.getRight(), infOrEqual);
185
186
187

                        // Push the new task in the vector
                        omp_set_lock(&mutexShareVariable);
BRAMAS Berenger's avatar
BRAMAS Berenger committed
188
189
                        tasks.push_back(TaskInterval(part+1, ts.getRight(), ts.getDeep()-1));
                        tasks.push_back(TaskInterval(ts.getLeft(), part-1, ts.getDeep()-1));
190
191
192
                        // We create new task but we are not working so inform other
                        numberOfThreadProceeding -= 1;
                        omp_unset_lock(&mutexShareVariable);
193
194
195
                    }
                }
                else{
196
197
198
199
200
201
202
203
                    // There is not task in the vector
                    #pragma omp flush(numberOfThreadProceeding)
                    if(numberOfThreadProceeding == 0){
                        // And there is no thread that may create some tasks so stop here
                        hasWorkToDo = false;
                    }
                    // Release mutex
                    omp_unset_lock(&mutexShareVariable);
204
205
206
207
                }
            }
        }

208
        omp_destroy_lock(&mutexShareVariable);
209
    }
210
#else
211
    /** The openmp quick sort */
212
    static void QsOmp(SortType array[], const IndexType size, const infOrEqualPtr infOrEqual){
213
        const int nbTasksRequiere = (omp_get_max_threads() * 5);
BRAMAS Berenger's avatar
BRAMAS Berenger committed
214
215
        int deep = 0;
        while( (1 << deep) < nbTasksRequiere ) deep += 1;
216

217
218
219
220
        #pragma omp parallel
        {
            #pragma omp single nowait
            {
221
                QsOmpTask(array, 0, size - 1 , deep, infOrEqual);
222
223
224
            }
        }
    }
225
#endif
226
227

    static void QsOmp(SortType array[], const IndexType size){
228
        QsOmp(array, size, [](const SortType& v1, const SortType& v2){
229
230
231
            return v1 <= v2;
        });
    }
232
233
234
};

#endif // FQUICKSORT_HPP