Commit 50da648e authored by berenger-bramas's avatar berenger-bramas
Browse files

Update the openmp quick sort with a well made custom barrier.

git-svn-id: svn+ssh://scm.gforge.inria.fr/svn/scalfmm/scalfmm/trunk@218 2616d619-271b-44dc-8df4-d4a8f33a7222
parent 79038ca6
#ifndef FOMPBARRIER_HPP
#define FOMPBARRIER_HPP
#include <omp.h>
#include <climits>
/** This function is a custom omp barrier
* Because openmp give only a global barrier we need
* to be ablo to peform a barrier operation between a group
* of thread only.
*/
class FOmpBarrier {
private:
int nbThreads; //<The number of threads for this barrier
int currentNbThread; //<The current number of threads waiting
bool sense; //<Direct barrier feedback protection
omp_lock_t mutex; //<To have an atomic int
FOmpBarrier(FOmpBarrier&){}
FOmpBarrier& operator=(FOmpBarrier&){return *this;}
public:
/** Constructor with the number of threads */
FOmpBarrier(const int inNbThreads = INT_MAX)
: nbThreads(inNbThreads), currentNbThread(0), sense(false) {
omp_init_lock( &mutex );
}
/** Destructor, release the omp lock */
~FOmpBarrier(){
omp_destroy_lock( &mutex );
}
/** Perform a barrier */
void wait(){
const bool mySense = sense;
omp_set_lock( &mutex );
const int nbThreadsArrived = (++currentNbThread);
omp_unset_lock( &mutex );
if(nbThreadsArrived == nbThreads) {
currentNbThread = 0;
sense = !sense;
#pragma omp flush(sense)
}
else {
volatile const bool* const ptSense = &sense;
while( (*ptSense) == mySense){
}
}
}
/** Change the number of threads */
void setNbThreads(const int inNbThread){
omp_set_lock( &mutex );
nbThreads = inNbThread;
omp_unset_lock( &mutex );
}
};
#endif // FOMPBARRIER_HPP
......@@ -9,9 +9,11 @@
#include <mpi.h>
#include "../Utils/FGlobal.hpp"
#include "../Utils/FMemUtils.hpp"
#include "../Utils/FTrace.hpp"
#include "FGlobal.hpp"
#include "FMemUtils.hpp"
#include "FTrace.hpp"
#include "FOmpBarrier.hpp"
class FQuickSort {
////////////////////////////////////////////////////////////
......@@ -61,37 +63,6 @@ class FQuickSort {
return int(double(position)/step);
}
////////////////////////////////////////////////////////////
// OMP Function
////////////////////////////////////////////////////////////
/* custom barrier to wait proc from first to last, not all threads! */
static void OmpBarrier(int mutex[], const int firstProc, const int lastProc, const int myThreadId){
if(lastProc != firstProc){
const int idRelative = myThreadId - firstProc;
while(mutex[firstProc] != idRelative ){
#pragma omp flush(mutex)
}
++mutex[firstProc];
#pragma omp flush(mutex)
if(myThreadId == lastProc){
mutex[firstProc] = idRelative - 1;
}
else{
while(mutex[firstProc] != idRelative ){
#pragma omp flush(mutex)
}
if(idRelative != 0){
--mutex[firstProc];
#pragma omp flush(mutex)
}
}
}
}
////////////////////////////////////////////////////////////
// MPI Function
......@@ -100,7 +71,7 @@ class FQuickSort {
/** generic mpi assert function */
static void mpiassert(const int test, const unsigned line, const char* const message = 0){
if(test != MPI_SUCCESS){
printf("[ERROR] Test failled at line %d, result is %d", line, test);
printf("[ERROR-QS] Test failled at line %d, result is %d", line, test);
if(message) printf(", message: %s",message);
printf("\n");
fflush(stdout);
......@@ -239,8 +210,7 @@ public:
SortType*const temporaryArray = reinterpret_cast<SortType*>(new char[sizeof(SortType) * size]);
int mutex[NbOfThreads];
memset(mutex, 0, sizeof(int) * NbOfThreads);
FOmpBarrier barriers[NbOfThreads];
#pragma omp parallel
{
......@@ -259,15 +229,19 @@ public:
Fix* const fixesSum = &allFixesSum[0][firstProc];
const FSize nbElements = endIndex - startIndex + 1;
if(myThreadId == firstProc){
barriers[firstProc].setNbThreads( lastProc - firstProc + 1);
}
// sort QsLocal part of the array
const PivotType pivot = (PivotType(array[startIndex]) + PivotType(array[endIndex]) )/2;
OmpBarrier( mutex, firstProc, lastProc, myThreadId);
barriers[firstProc].wait();
QsLocal(array, pivot, myLeft, myRight, fixes[myThreadId].pre, fixes[myThreadId].suf);
// wait others that work on this part
#pragma omp flush(array)
OmpBarrier( mutex, firstProc, lastProc, myThreadId);
barriers[firstProc].wait();
// merge result
if(myThreadId == firstProc){
......@@ -285,7 +259,7 @@ public:
#pragma omp flush(temporaryArray)
}
OmpBarrier( mutex, firstProc, lastProc, myThreadId);
barriers[firstProc].wait();
// copy my result where it belong (< pivot)
FMemUtils::memcpy(&array[startIndex + fixesSum[myThreadId].pre], &temporaryArray[myLeft], sizeof(SortType) * fixes[myThreadId].pre);
......@@ -294,7 +268,7 @@ public:
const FSize sufoffset = fixesSum[lastProc + 1].pre + startIndex;
FMemUtils::memcpy(&array[sufoffset + fixesSum[myThreadId].suf], &temporaryArray[myLeft + fixes[myThreadId].pre ], sizeof(SortType) * fixes[myThreadId].suf);
OmpBarrier( mutex, firstProc, lastProc, myThreadId);
barriers[firstProc].wait();
// find my next QsLocal part
int splitProc = getProc(sufoffset - startIndex, nbElements, lastProc - firstProc + 1) + firstProc;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment