FFmmAlgorithmThread.hpp 16 KB
Newer Older
1
// ===================================================================================
2 3 4 5 6 7 8 9
// Logiciel initial: ScalFmm Version 0.5
// Co-auteurs : Olivier Coulaud, Bérenger Bramas.
// Propriétaires : INRIA.
// Copyright © 2011-2012, diffusé sous les termes et conditions d’une licence propriétaire.
// Initial software: ScalFmm Version 0.5
// Co-authors: Olivier Coulaud, Bérenger Bramas.
// Owners: INRIA.
// Copyright © 2011-2012, spread under the terms and conditions of a proprietary license.
10
// ===================================================================================
11 12
#ifndef FFMMALGORITHMTHREAD_HPP
#define FFMMALGORITHMTHREAD_HPP
13

14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38

#include "../Utils/FAssertable.hpp"
#include "../Utils/FDebug.hpp"
#include "../Utils/FTrace.hpp"
#include "../Utils/FTic.hpp"
#include "../Utils/FGlobal.hpp"

#include "../Containers/FOctree.hpp"


#include <omp.h>

/**
* @author Berenger Bramas (berenger.bramas@inria.fr)
* @class FFmmAlgorithmThread
* @brief
* Please read the license
*
* This class is a threaded FMM algorithm
* It just iterates on a tree and call the kernels with good arguments.
* It used the inspector-executor model :
* iterates on the tree and builds an array to work in parallel on this array
*
* Of course this class does not deallocate pointer given in arguements.
*
berenger-bramas's avatar
berenger-bramas committed
39
* When using this algorithm the P2P is thread safe.
40
*/
berenger-bramas's avatar
berenger-bramas committed
41
template<class OctreeClass, class ParticleClass, class CellClass, class ContainerClass, class KernelClass, class LeafClass>
42
class FFmmAlgorithmThread : protected FAssertable{
berenger-bramas's avatar
berenger-bramas committed
43 44
    OctreeClass* const tree;                  //< The octree to work on
    KernelClass** kernels;                    //< The kernels
45

berenger-bramas's avatar
berenger-bramas committed
46
    typename OctreeClass::Iterator* iterArray;
47
    int leafsNumber;
48 49

    static const int SizeShape = 3*3*3;
berenger-bramas's avatar
berenger-bramas committed
50 51 52
    int shapeLeaf[SizeShape];    

    const int MaxThreads;
53

54 55
    const int OctreeHeight;

56 57 58 59 60 61
public:
    /** The constructor need the octree and the kernels used for computation
      * @param inTree the octree to work on
      * @param inKernels the kernels to call
      * An assert is launched if one of the arguments is null
      */
berenger-bramas's avatar
berenger-bramas committed
62
    FFmmAlgorithmThread(OctreeClass* const inTree, KernelClass* const inKernels)
63 64
        : tree(inTree) , kernels(0), iterArray(0), leafsNumber(0),
          MaxThreads(omp_get_max_threads()), OctreeHeight(tree->getHeight()) {
65

66
        fassert(tree, "tree cannot be null", __LINE__, __FILE__);
67

berenger-bramas's avatar
berenger-bramas committed
68
        this->kernels = new KernelClass*[MaxThreads];
berenger-bramas's avatar
berenger-bramas committed
69
        for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread){
berenger-bramas's avatar
berenger-bramas committed
70
            this->kernels[idxThread] = new KernelClass(*inKernels);
71 72
        }

berenger-bramas's avatar
berenger-bramas committed
73
        FDEBUG(FDebug::Controller << "FFmmAlgorithmThread (Max Thread " << omp_get_max_threads() << ")\n");
74 75 76 77
    }

    /** Default destructor */
    virtual ~FFmmAlgorithmThread(){
berenger-bramas's avatar
berenger-bramas committed
78
        for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread){
79 80
            delete this->kernels[idxThread];
        }
berenger-bramas's avatar
berenger-bramas committed
81
        delete [] this->kernels;
82 83 84 85 86 87 88
    }

    /**
      * To execute the fmm algorithm
      * Call this function to run the complete algorithm
      */
    void execute(){
89
        FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) );
90 91 92 93 94 95

        for(int idxShape = 0 ; idxShape < SizeShape ; ++idxShape){
            this->shapeLeaf[idxShape] = 0;
        }

        // Count leaf
96
        leafsNumber = 0;
berenger-bramas's avatar
berenger-bramas committed
97
        typename OctreeClass::Iterator octreeIterator(tree);
98 99
        octreeIterator.gotoBottomLeft();
        do{
100
            ++leafsNumber;
101
            const FTreeCoordinate& coord = octreeIterator.getCurrentCell()->getCoordinate();
102 103 104
            ++this->shapeLeaf[(coord.getX()%3)*9 + (coord.getY()%3)*3 + (coord.getZ()%3)];

        } while(octreeIterator.moveRight());
berenger-bramas's avatar
berenger-bramas committed
105
        iterArray = new typename OctreeClass::Iterator[leafsNumber];
106
        fassert(iterArray, "iterArray bad alloc", __LINE__, __FILE__);
107 108

        bottomPass();
109

110 111
        upwardPass();

112 113
        transferPass();

114 115 116 117 118
        downardPass();

        directPass();

        delete [] iterArray;
119
        iterArray = 0;         
120 121
    }

122
private:
berenger-bramas's avatar
berenger-bramas committed
123 124 125 126
    /////////////////////////////////////////////////////////////////////////////
    // P2M
    /////////////////////////////////////////////////////////////////////////////

127 128
    /** P2M */
    void bottomPass(){
129
        FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) );
130
        FDEBUG( FDebug::Controller.write("\tStart Bottom Pass\n").write(FDebug::Flush) );
berenger-bramas's avatar
berenger-bramas committed
131
        FDEBUG(FTic counterTime);
132

berenger-bramas's avatar
berenger-bramas committed
133
        typename OctreeClass::Iterator octreeIterator(tree);
134 135 136 137 138 139 140 141
        int leafs = 0;
        // Iterate on leafs
        octreeIterator.gotoBottomLeft();
        do{
            iterArray[leafs] = octreeIterator;
            ++leafs;
        } while(octreeIterator.moveRight());

berenger-bramas's avatar
berenger-bramas committed
142
        FDEBUG(FTic computationCounter);
143
#pragma omp parallel
144
        {
berenger-bramas's avatar
berenger-bramas committed
145
            KernelClass * const myThreadkernels = kernels[omp_get_thread_num()];
146
#pragma omp for nowait
147 148 149 150 151 152
            for(int idxLeafs = 0 ; idxLeafs < leafs ; ++idxLeafs){
                // We need the current cell that represent the leaf
                // and the list of particles
                myThreadkernels->P2M( iterArray[idxLeafs].getCurrentCell() , iterArray[idxLeafs].getCurrentListSrc());
            }
        }
berenger-bramas's avatar
berenger-bramas committed
153
        FDEBUG(computationCounter.tac() );
154

155
        FDEBUG( FDebug::Controller << "\tFinished (@Bottom Pass (P2M) = "  << counterTime.tacAndElapsed() << "s)\n" );
156
        FDEBUG( FDebug::Controller << "\t\t Computation : " << computationCounter.elapsed() << " s\n" );
157

158 159
    }

berenger-bramas's avatar
berenger-bramas committed
160 161 162 163
    /////////////////////////////////////////////////////////////////////////////
    // Upward
    /////////////////////////////////////////////////////////////////////////////

164 165
    /** M2M */
    void upwardPass(){
166
        FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) );
167
        FDEBUG( FDebug::Controller.write("\tStart Upward Pass\n").write(FDebug::Flush); );
berenger-bramas's avatar
berenger-bramas committed
168 169
        FDEBUG(FTic counterTime);
        FDEBUG(FTic computationCounter);
170 171

        // Start from leal level - 1
berenger-bramas's avatar
berenger-bramas committed
172
        typename OctreeClass::Iterator octreeIterator(tree);
173 174
        octreeIterator.gotoBottomLeft();
        octreeIterator.moveUp();
berenger-bramas's avatar
berenger-bramas committed
175
        typename OctreeClass::Iterator avoidGotoLeftIterator(octreeIterator);
176 177 178

        // for each levels
        for(int idxLevel = OctreeHeight - 2 ; idxLevel > 1 ; --idxLevel ){
179
            int numberOfCells = 0;
180 181
            // for each cells
            do{
182 183
                iterArray[numberOfCells] = octreeIterator;
                ++numberOfCells;
184 185 186 187 188
            } while(octreeIterator.moveRight());
            avoidGotoLeftIterator.moveUp();
            octreeIterator = avoidGotoLeftIterator;// equal octreeIterator.moveUp(); octreeIterator.gotoLeft();

            FDEBUG(computationCounter.tic());
189
#pragma omp parallel
190
            {
berenger-bramas's avatar
berenger-bramas committed
191
                KernelClass * const myThreadkernels = kernels[omp_get_thread_num()];
192
#pragma omp for nowait
193
                for(int idxCell = 0 ; idxCell < numberOfCells ; ++idxCell){
194 195
                    // We need the current cell and the child
                    // child is an array (of 8 child) that may be null
196
                    myThreadkernels->M2M( iterArray[idxCell].getCurrentCell() , iterArray[idxCell].getCurrentChild(), idxLevel);
197 198
                }
            }
berenger-bramas's avatar
berenger-bramas committed
199

200 201 202
            FDEBUG(computationCounter.tac());
        }

berenger-bramas's avatar
berenger-bramas committed
203

204
        FDEBUG( FDebug::Controller << "\tFinished (@Upward Pass (M2M) = "  << counterTime.tacAndElapsed() << "s)\n" );
berenger-bramas's avatar
berenger-bramas committed
205
        FDEBUG( FDebug::Controller << "\t\t Computation : " << computationCounter.cumulated() << " s\n" );
206

207 208
    }

berenger-bramas's avatar
berenger-bramas committed
209
    /////////////////////////////////////////////////////////////////////////////
210
    // Transfer
berenger-bramas's avatar
berenger-bramas committed
211 212
    /////////////////////////////////////////////////////////////////////////////

213
    /** M2L L2L */
214
    void transferPass(){
215
        FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) );
216

217 218 219
        FDEBUG( FDebug::Controller.write("\tStart Downward Pass (M2L)\n").write(FDebug::Flush); );
        FDEBUG(FTic counterTime);
        FDEBUG(FTic computationCounter);
berenger-bramas's avatar
berenger-bramas committed
220

221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239
        typename OctreeClass::Iterator octreeIterator(tree);
        octreeIterator.moveDown();
        typename OctreeClass::Iterator avoidGotoLeftIterator(octreeIterator);

        // for each levels
        for(int idxLevel = 2 ; idxLevel < OctreeHeight ; ++idxLevel ){
            int numberOfCells = 0;
            // for each cells
            do{
                iterArray[numberOfCells] = octreeIterator;
                ++numberOfCells;
            } while(octreeIterator.moveRight());
            avoidGotoLeftIterator.moveDown();
            octreeIterator = avoidGotoLeftIterator;

            FDEBUG(computationCounter.tic());
#pragma omp parallel
            {
                KernelClass * const myThreadkernels = kernels[omp_get_thread_num()];
240
                const CellClass* neighbors[343];
241 242 243

#pragma omp for  schedule(dynamic) nowait
                for(int idxCell = 0 ; idxCell < numberOfCells ; ++idxCell){
244
                    const int counter = tree->getInteractionNeighbors(neighbors,  iterArray[idxCell].getCurrentGlobalCoordinate(),idxLevel);
245
                    if(counter) myThreadkernels->M2L( iterArray[idxCell].getCurrentCell() , neighbors, counter, idxLevel);
246 247
                }
            }
248
            FDEBUG(computationCounter.tac());
249 250
        }

251 252 253
        FDEBUG( FDebug::Controller << "\tFinished (@Downward Pass (M2L) = "  << counterTime.tacAndElapsed() << "s)\n" );
        FDEBUG( FDebug::Controller << "\t\t Computation : " << computationCounter.cumulated() << " s\n" );
    }
berenger-bramas's avatar
berenger-bramas committed
254

255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289
    /////////////////////////////////////////////////////////////////////////////
    // Downard
    /////////////////////////////////////////////////////////////////////////////

    void downardPass(){ // second L2L
        FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) );

        FDEBUG( FDebug::Controller.write("\tStart Downward Pass (L2L)\n").write(FDebug::Flush); );
        FDEBUG(FTic counterTime);
        FDEBUG(FTic computationCounter);

        typename OctreeClass::Iterator octreeIterator(tree);
        octreeIterator.moveDown();

        typename OctreeClass::Iterator avoidGotoLeftIterator(octreeIterator);

        const int heightMinusOne = OctreeHeight - 1;
        // for each levels exepted leaf level
        for(int idxLevel = 2 ; idxLevel < heightMinusOne ; ++idxLevel ){
            int numberOfCells = 0;
            // for each cells
            do{
                iterArray[numberOfCells] = octreeIterator;
                ++numberOfCells;
            } while(octreeIterator.moveRight());
            avoidGotoLeftIterator.moveDown();
            octreeIterator = avoidGotoLeftIterator;

            FDEBUG(computationCounter.tic());
#pragma omp parallel
            {
                KernelClass * const myThreadkernels = kernels[omp_get_thread_num()];
#pragma omp for nowait
                for(int idxCell = 0 ; idxCell < numberOfCells ; ++idxCell){
                    myThreadkernels->L2L( iterArray[idxCell].getCurrentCell() , iterArray[idxCell].getCurrentChild(), idxLevel);
290 291
                }
            }
292
            FDEBUG(computationCounter.tac());
293 294
        }

295 296
        FDEBUG( FDebug::Controller << "\tFinished (@Downward Pass (L2L) = "  << counterTime.tacAndElapsed() << "s)\n" );
        FDEBUG( FDebug::Controller << "\t\t Computation : " << computationCounter.cumulated() << " s\n" );
297 298
    }

299 300


berenger-bramas's avatar
berenger-bramas committed
301 302 303 304
    /////////////////////////////////////////////////////////////////////////////
    // Direct
    /////////////////////////////////////////////////////////////////////////////

305 306
    /** P2P */
    void directPass(){
307
        FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) );
308
        FDEBUG( FDebug::Controller.write("\tStart Direct Pass\n").write(FDebug::Flush); );
berenger-bramas's avatar
berenger-bramas committed
309 310
        FDEBUG(FTic counterTime);
        FDEBUG(FTic computationCounter);
311
        FDEBUG(FTic computationCounterP2P);
312

313
        omp_lock_t lockShape[SizeShape];
314
        for(int idxShape = 0 ; idxShape < SizeShape ; ++idxShape){
315
            omp_init_lock(&lockShape[idxShape]);
316 317
        }

318 319 320
        struct LeafData{
            MortonIndex index;
            CellClass* cell;
berenger-bramas's avatar
berenger-bramas committed
321 322
            ContainerClass* targets;
            ContainerClass* sources;
323 324 325
        };
        LeafData* const leafsDataArray = new LeafData[this->leafsNumber];

326
        const int LeafIndex = OctreeHeight - 1;
327 328 329 330 331 332 333

        int startPosAtShape[SizeShape];
        startPosAtShape[0] = 0;
        for(int idxShape = 1 ; idxShape < SizeShape ; ++idxShape){
            startPosAtShape[idxShape] = startPosAtShape[idxShape-1] + this->shapeLeaf[idxShape-1];
        }

334
        #pragma omp parallel
335
        {
336

337 338 339
            const float step = float(this->leafsNumber) / float(omp_get_num_threads());
            const int start = int(FMath::Ceil(step * float(omp_get_thread_num())));
            const int tempEnd = int(FMath::Ceil(step * float(omp_get_thread_num()+1)));
340
            const int end = (tempEnd > this->leafsNumber ? this->leafsNumber : tempEnd);
341

berenger-bramas's avatar
berenger-bramas committed
342
            typename OctreeClass::Iterator octreeIterator(tree);
343
            octreeIterator.gotoBottomLeft();
344 345 346 347 348

            for(int idxPreLeaf = 0 ; idxPreLeaf < start ; ++idxPreLeaf){
                octreeIterator.moveRight();
            }

349
            // for each leafs
350
            for(int idxMyLeafs = start ; idxMyLeafs < end ; ++idxMyLeafs){
351
                //iterArray[leafs] = octreeIterator;
352
                //++leafs;
353
                const FTreeCoordinate& coord = octreeIterator.getCurrentGlobalCoordinate();
354
                const int shapePosition = (coord.getX()%3)*9 + (coord.getY()%3)*3 + (coord.getZ()%3);
355

356 357 358
                omp_set_lock(&lockShape[shapePosition]);
                const int positionToWork = startPosAtShape[shapePosition]++;
                omp_unset_lock(&lockShape[shapePosition]);
359

360 361 362 363
                leafsDataArray[positionToWork].index = octreeIterator.getCurrentGlobalIndex();
                leafsDataArray[positionToWork].cell = octreeIterator.getCurrentCell();
                leafsDataArray[positionToWork].targets = octreeIterator.getCurrentListTargets();
                leafsDataArray[positionToWork].sources = octreeIterator.getCurrentListSrc();
364

365 366
                octreeIterator.moveRight();
            }
367

368
            #pragma omp barrier
369

berenger-bramas's avatar
berenger-bramas committed
370
            FDEBUG(if(!omp_get_thread_num()) computationCounter.tic());
371

berenger-bramas's avatar
berenger-bramas committed
372
            KernelClass& myThreadkernels = (*kernels[omp_get_thread_num()]);
373
            // There is a maximum of 26 neighbors
berenger-bramas's avatar
berenger-bramas committed
374
            ContainerClass* neighbors[27];
375
            int previous = 0;
376 377

            for(int idxShape = 0 ; idxShape < SizeShape ; ++idxShape){
378
                const int endAtThisShape = this->shapeLeaf[idxShape] + previous;
379

380
                #pragma omp for schedule(dynamic)
381 382 383
                for(int idxLeafs = previous ; idxLeafs < endAtThisShape ; ++idxLeafs){
                    LeafData& currentIter = leafsDataArray[idxLeafs];
                    myThreadkernels.L2P(currentIter.cell, currentIter.targets);
384
                    // need the current particles and neighbors particles
385
                    FDEBUG(if(!omp_get_thread_num()) computationCounterP2P.tic());
386
                    const int counter = tree->getLeafsNeighbors(neighbors, currentIter.cell->getCoordinate(), LeafIndex);
387 388
                    myThreadkernels.P2P(currentIter.cell->getCoordinate(), currentIter.targets,
                                        currentIter.sources, neighbors, counter);
389
                    FDEBUG(if(!omp_get_thread_num()) computationCounterP2P.tac());
390
                }
391 392

                previous = endAtThisShape;
393 394
            }
        }
395

396 397
        FDEBUG(computationCounter.tac());

398
        delete [] leafsDataArray;
399 400 401 402
        for(int idxShape = 0 ; idxShape < SizeShape ; ++idxShape){
            omp_destroy_lock(&lockShape[idxShape]);
        }

403

404 405
        FDEBUG( FDebug::Controller << "\tFinished (@Direct Pass (L2P + P2P) = "  << counterTime.tacAndElapsed() << "s)\n" );
        FDEBUG( FDebug::Controller << "\t\t Computation L2P + P2P : " << computationCounter.cumulated() << " s\n" );
406
        FDEBUG( FDebug::Controller << "\t\t Computation P2P : " << computationCounterP2P.cumulated() << " s\n" );
407

408 409 410 411 412 413 414
    }

};


#endif //FFMMALGORITHMTHREAD_HPP

415