Une nouvelle version du portail de gestion des comptes externes sera mise en production lundi 09 août. Elle permettra d'allonger la validité d'un compte externe jusqu'à 3 ans. Pour plus de détails sur cette version consulter : https://doc-si.inria.fr/x/FCeS

FFmmAlgorithmThread.hpp 16.2 KB
Newer Older
1
// ===================================================================================
2 3 4 5 6 7 8 9
// Logiciel initial: ScalFmm Version 0.5
// Co-auteurs : Olivier Coulaud, Bérenger Bramas.
// Propriétaires : INRIA.
// Copyright © 2011-2012, diffusé sous les termes et conditions d’une licence propriétaire.
// Initial software: ScalFmm Version 0.5
// Co-authors: Olivier Coulaud, Bérenger Bramas.
// Owners: INRIA.
// Copyright © 2011-2012, spread under the terms and conditions of a proprietary license.
10
// ===================================================================================
11 12
#ifndef FFMMALGORITHMTHREAD_HPP
#define FFMMALGORITHMTHREAD_HPP
13

14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38

#include "../Utils/FAssertable.hpp"
#include "../Utils/FDebug.hpp"
#include "../Utils/FTrace.hpp"
#include "../Utils/FTic.hpp"
#include "../Utils/FGlobal.hpp"

#include "../Containers/FOctree.hpp"


#include <omp.h>

/**
* @author Berenger Bramas (berenger.bramas@inria.fr)
* @class FFmmAlgorithmThread
* @brief
* Please read the license
*
* This class is a threaded FMM algorithm
* It just iterates on a tree and call the kernels with good arguments.
* It used the inspector-executor model :
* iterates on the tree and builds an array to work in parallel on this array
*
* Of course this class does not deallocate pointer given in arguements.
*
berenger-bramas's avatar
berenger-bramas committed
39
* When using this algorithm the P2P is thread safe.
40
*/
berenger-bramas's avatar
berenger-bramas committed
41
template<class OctreeClass, class ParticleClass, class CellClass, class ContainerClass, class KernelClass, class LeafClass>
42
class FFmmAlgorithmThread : protected FAssertable{
berenger-bramas's avatar
berenger-bramas committed
43 44
    OctreeClass* const tree;                  //< The octree to work on
    KernelClass** kernels;                    //< The kernels
45

berenger-bramas's avatar
berenger-bramas committed
46
    typename OctreeClass::Iterator* iterArray;
47
    int leafsNumber;
48 49

    static const int SizeShape = 3*3*3;
berenger-bramas's avatar
berenger-bramas committed
50 51 52
    int shapeLeaf[SizeShape];    

    const int MaxThreads;
53

54 55
    const int OctreeHeight;

56 57 58 59 60 61
public:
    /** The constructor need the octree and the kernels used for computation
      * @param inTree the octree to work on
      * @param inKernels the kernels to call
      * An assert is launched if one of the arguments is null
      */
berenger-bramas's avatar
berenger-bramas committed
62
    FFmmAlgorithmThread(OctreeClass* const inTree, KernelClass* const inKernels)
63 64
        : tree(inTree) , kernels(0), iterArray(0), leafsNumber(0),
          MaxThreads(omp_get_max_threads()), OctreeHeight(tree->getHeight()) {
65

66
        fassert(tree, "tree cannot be null", __LINE__, __FILE__);
67

berenger-bramas's avatar
berenger-bramas committed
68
        this->kernels = new KernelClass*[MaxThreads];
berenger-bramas's avatar
berenger-bramas committed
69
        for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread){
berenger-bramas's avatar
berenger-bramas committed
70
            this->kernels[idxThread] = new KernelClass(*inKernels);
71 72
        }

berenger-bramas's avatar
berenger-bramas committed
73
        FDEBUG(FDebug::Controller << "FFmmAlgorithmThread (Max Thread " << omp_get_max_threads() << ")\n");
74 75 76 77
    }

    /** Default destructor */
    virtual ~FFmmAlgorithmThread(){
berenger-bramas's avatar
berenger-bramas committed
78
        for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread){
79 80
            delete this->kernels[idxThread];
        }
berenger-bramas's avatar
berenger-bramas committed
81
        delete [] this->kernels;
82 83 84 85 86 87 88
    }

    /**
      * To execute the fmm algorithm
      * Call this function to run the complete algorithm
      */
    void execute(){
89
        FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) );
90 91 92 93 94 95

        for(int idxShape = 0 ; idxShape < SizeShape ; ++idxShape){
            this->shapeLeaf[idxShape] = 0;
        }

        // Count leaf
96
        leafsNumber = 0;
berenger-bramas's avatar
berenger-bramas committed
97
        typename OctreeClass::Iterator octreeIterator(tree);
98 99
        octreeIterator.gotoBottomLeft();
        do{
100
            ++leafsNumber;
101
            const FTreeCoordinate& coord = octreeIterator.getCurrentCell()->getCoordinate();
102 103 104
            ++this->shapeLeaf[(coord.getX()%3)*9 + (coord.getY()%3)*3 + (coord.getZ()%3)];

        } while(octreeIterator.moveRight());
berenger-bramas's avatar
berenger-bramas committed
105
        iterArray = new typename OctreeClass::Iterator[leafsNumber];
106
        fassert(iterArray, "iterArray bad alloc", __LINE__, __FILE__);
107 108

        bottomPass();
109

110 111
        upwardPass();

112 113
        transferPass();

114 115 116 117 118
        downardPass();

        directPass();

        delete [] iterArray;
119
        iterArray = 0;         
120 121
    }

122
private:
berenger-bramas's avatar
berenger-bramas committed
123 124 125 126
    /////////////////////////////////////////////////////////////////////////////
    // P2M
    /////////////////////////////////////////////////////////////////////////////

127 128
    /** P2M */
    void bottomPass(){
129
        FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) );
130
        FDEBUG( FDebug::Controller.write("\tStart Bottom Pass\n").write(FDebug::Flush) );
berenger-bramas's avatar
berenger-bramas committed
131
        FDEBUG(FTic counterTime);
132

berenger-bramas's avatar
berenger-bramas committed
133
        typename OctreeClass::Iterator octreeIterator(tree);
134 135 136 137 138 139 140 141
        int leafs = 0;
        // Iterate on leafs
        octreeIterator.gotoBottomLeft();
        do{
            iterArray[leafs] = octreeIterator;
            ++leafs;
        } while(octreeIterator.moveRight());

berenger-bramas's avatar
berenger-bramas committed
142
        FDEBUG(FTic computationCounter);
143
#pragma omp parallel
144
        {
berenger-bramas's avatar
berenger-bramas committed
145
            KernelClass * const myThreadkernels = kernels[omp_get_thread_num()];
146
#pragma omp for nowait
147 148 149 150 151 152
            for(int idxLeafs = 0 ; idxLeafs < leafs ; ++idxLeafs){
                // We need the current cell that represent the leaf
                // and the list of particles
                myThreadkernels->P2M( iterArray[idxLeafs].getCurrentCell() , iterArray[idxLeafs].getCurrentListSrc());
            }
        }
berenger-bramas's avatar
berenger-bramas committed
153
        FDEBUG(computationCounter.tac() );
154

155
        FDEBUG( FDebug::Controller << "\tFinished (@Bottom Pass (P2M) = "  << counterTime.tacAndElapsed() << "s)\n" );
156
        FDEBUG( FDebug::Controller << "\t\t Computation : " << computationCounter.elapsed() << " s\n" );
157

158 159
    }

berenger-bramas's avatar
berenger-bramas committed
160 161 162 163
    /////////////////////////////////////////////////////////////////////////////
    // Upward
    /////////////////////////////////////////////////////////////////////////////

164 165
    /** M2M */
    void upwardPass(){
166
        FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) );
167
        FDEBUG( FDebug::Controller.write("\tStart Upward Pass\n").write(FDebug::Flush); );
berenger-bramas's avatar
berenger-bramas committed
168 169
        FDEBUG(FTic counterTime);
        FDEBUG(FTic computationCounter);
170 171

        // Start from leal level - 1
berenger-bramas's avatar
berenger-bramas committed
172
        typename OctreeClass::Iterator octreeIterator(tree);
173 174
        octreeIterator.gotoBottomLeft();
        octreeIterator.moveUp();
berenger-bramas's avatar
berenger-bramas committed
175
        typename OctreeClass::Iterator avoidGotoLeftIterator(octreeIterator);
176 177 178

        // for each levels
        for(int idxLevel = OctreeHeight - 2 ; idxLevel > 1 ; --idxLevel ){
179
            int numberOfCells = 0;
180 181
            // for each cells
            do{
182 183
                iterArray[numberOfCells] = octreeIterator;
                ++numberOfCells;
184 185 186 187 188
            } while(octreeIterator.moveRight());
            avoidGotoLeftIterator.moveUp();
            octreeIterator = avoidGotoLeftIterator;// equal octreeIterator.moveUp(); octreeIterator.gotoLeft();

            FDEBUG(computationCounter.tic());
189
#pragma omp parallel
190
            {
berenger-bramas's avatar
berenger-bramas committed
191
                KernelClass * const myThreadkernels = kernels[omp_get_thread_num()];
192
#pragma omp for nowait
193
                for(int idxCell = 0 ; idxCell < numberOfCells ; ++idxCell){
194 195
                    // We need the current cell and the child
                    // child is an array (of 8 child) that may be null
196
                    myThreadkernels->M2M( iterArray[idxCell].getCurrentCell() , iterArray[idxCell].getCurrentChild(), idxLevel);
197 198
                }
            }
berenger-bramas's avatar
berenger-bramas committed
199

200 201 202
            FDEBUG(computationCounter.tac());
        }

berenger-bramas's avatar
berenger-bramas committed
203

204
        FDEBUG( FDebug::Controller << "\tFinished (@Upward Pass (M2M) = "  << counterTime.tacAndElapsed() << "s)\n" );
berenger-bramas's avatar
berenger-bramas committed
205
        FDEBUG( FDebug::Controller << "\t\t Computation : " << computationCounter.cumulated() << " s\n" );
206

207 208
    }

berenger-bramas's avatar
berenger-bramas committed
209
    /////////////////////////////////////////////////////////////////////////////
210
    // Transfer
berenger-bramas's avatar
berenger-bramas committed
211 212
    /////////////////////////////////////////////////////////////////////////////

213
    /** M2L L2L */
214
    void transferPass(){
215
        FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) );
216

217 218 219
        FDEBUG( FDebug::Controller.write("\tStart Downward Pass (M2L)\n").write(FDebug::Flush); );
        FDEBUG(FTic counterTime);
        FDEBUG(FTic computationCounter);
berenger-bramas's avatar
berenger-bramas committed
220

221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236
        typename OctreeClass::Iterator octreeIterator(tree);
        octreeIterator.moveDown();
        typename OctreeClass::Iterator avoidGotoLeftIterator(octreeIterator);

        // for each levels
        for(int idxLevel = 2 ; idxLevel < OctreeHeight ; ++idxLevel ){
            int numberOfCells = 0;
            // for each cells
            do{
                iterArray[numberOfCells] = octreeIterator;
                ++numberOfCells;
            } while(octreeIterator.moveRight());
            avoidGotoLeftIterator.moveDown();
            octreeIterator = avoidGotoLeftIterator;

            FDEBUG(computationCounter.tic());
237
            #pragma omp parallel
238 239
            {
                KernelClass * const myThreadkernels = kernels[omp_get_thread_num()];
240
                const CellClass* neighbors[343];
241

242
                #pragma omp for  schedule(dynamic) nowait
243
                for(int idxCell = 0 ; idxCell < numberOfCells ; ++idxCell){
244
                    const int counter = tree->getInteractionNeighbors(neighbors,  iterArray[idxCell].getCurrentGlobalCoordinate(),idxLevel);
245
                    if(counter) myThreadkernels->M2L( iterArray[idxCell].getCurrentCell() , neighbors, counter, idxLevel);
246
                }
247 248 249 250

                FDEBUG(computationCounter.tic());
                myThreadkernels->finishedLevelM2L(idxLevel);
                FDEBUG(computationCounter.tac());
251
            }
252
            FDEBUG(computationCounter.tac());
253 254
        }

255 256 257
        FDEBUG( FDebug::Controller << "\tFinished (@Downward Pass (M2L) = "  << counterTime.tacAndElapsed() << "s)\n" );
        FDEBUG( FDebug::Controller << "\t\t Computation : " << computationCounter.cumulated() << " s\n" );
    }
berenger-bramas's avatar
berenger-bramas committed
258

259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287
    /////////////////////////////////////////////////////////////////////////////
    // Downard
    /////////////////////////////////////////////////////////////////////////////

    void downardPass(){ // second L2L
        FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) );

        FDEBUG( FDebug::Controller.write("\tStart Downward Pass (L2L)\n").write(FDebug::Flush); );
        FDEBUG(FTic counterTime);
        FDEBUG(FTic computationCounter);

        typename OctreeClass::Iterator octreeIterator(tree);
        octreeIterator.moveDown();

        typename OctreeClass::Iterator avoidGotoLeftIterator(octreeIterator);

        const int heightMinusOne = OctreeHeight - 1;
        // for each levels exepted leaf level
        for(int idxLevel = 2 ; idxLevel < heightMinusOne ; ++idxLevel ){
            int numberOfCells = 0;
            // for each cells
            do{
                iterArray[numberOfCells] = octreeIterator;
                ++numberOfCells;
            } while(octreeIterator.moveRight());
            avoidGotoLeftIterator.moveDown();
            octreeIterator = avoidGotoLeftIterator;

            FDEBUG(computationCounter.tic());
288
            #pragma omp parallel
289 290
            {
                KernelClass * const myThreadkernels = kernels[omp_get_thread_num()];
291
                #pragma omp for nowait
292 293
                for(int idxCell = 0 ; idxCell < numberOfCells ; ++idxCell){
                    myThreadkernels->L2L( iterArray[idxCell].getCurrentCell() , iterArray[idxCell].getCurrentChild(), idxLevel);
294 295
                }
            }
296
            FDEBUG(computationCounter.tac());
297 298
        }

299 300
        FDEBUG( FDebug::Controller << "\tFinished (@Downward Pass (L2L) = "  << counterTime.tacAndElapsed() << "s)\n" );
        FDEBUG( FDebug::Controller << "\t\t Computation : " << computationCounter.cumulated() << " s\n" );
301 302
    }

303 304


berenger-bramas's avatar
berenger-bramas committed
305 306 307 308
    /////////////////////////////////////////////////////////////////////////////
    // Direct
    /////////////////////////////////////////////////////////////////////////////

309 310
    /** P2P */
    void directPass(){
311
        FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) );
312
        FDEBUG( FDebug::Controller.write("\tStart Direct Pass\n").write(FDebug::Flush); );
berenger-bramas's avatar
berenger-bramas committed
313 314
        FDEBUG(FTic counterTime);
        FDEBUG(FTic computationCounter);
315
        FDEBUG(FTic computationCounterP2P);
316

317
        omp_lock_t lockShape[SizeShape];
318
        for(int idxShape = 0 ; idxShape < SizeShape ; ++idxShape){
319
            omp_init_lock(&lockShape[idxShape]);
320 321
        }

322 323 324
        struct LeafData{
            MortonIndex index;
            CellClass* cell;
berenger-bramas's avatar
berenger-bramas committed
325 326
            ContainerClass* targets;
            ContainerClass* sources;
327 328 329
        };
        LeafData* const leafsDataArray = new LeafData[this->leafsNumber];

330
        const int LeafIndex = OctreeHeight - 1;
331 332 333 334 335 336 337

        int startPosAtShape[SizeShape];
        startPosAtShape[0] = 0;
        for(int idxShape = 1 ; idxShape < SizeShape ; ++idxShape){
            startPosAtShape[idxShape] = startPosAtShape[idxShape-1] + this->shapeLeaf[idxShape-1];
        }

338
        #pragma omp parallel
339
        {
340

341 342 343
            const float step = float(this->leafsNumber) / float(omp_get_num_threads());
            const int start = int(FMath::Ceil(step * float(omp_get_thread_num())));
            const int tempEnd = int(FMath::Ceil(step * float(omp_get_thread_num()+1)));
344
            const int end = (tempEnd > this->leafsNumber ? this->leafsNumber : tempEnd);
345

berenger-bramas's avatar
berenger-bramas committed
346
            typename OctreeClass::Iterator octreeIterator(tree);
347
            octreeIterator.gotoBottomLeft();
348 349 350 351 352

            for(int idxPreLeaf = 0 ; idxPreLeaf < start ; ++idxPreLeaf){
                octreeIterator.moveRight();
            }

353
            // for each leafs
354
            for(int idxMyLeafs = start ; idxMyLeafs < end ; ++idxMyLeafs){
355
                //iterArray[leafs] = octreeIterator;
356
                //++leafs;
357
                const FTreeCoordinate& coord = octreeIterator.getCurrentGlobalCoordinate();
358
                const int shapePosition = (coord.getX()%3)*9 + (coord.getY()%3)*3 + (coord.getZ()%3);
359

360 361 362
                omp_set_lock(&lockShape[shapePosition]);
                const int positionToWork = startPosAtShape[shapePosition]++;
                omp_unset_lock(&lockShape[shapePosition]);
363

364 365 366 367
                leafsDataArray[positionToWork].index = octreeIterator.getCurrentGlobalIndex();
                leafsDataArray[positionToWork].cell = octreeIterator.getCurrentCell();
                leafsDataArray[positionToWork].targets = octreeIterator.getCurrentListTargets();
                leafsDataArray[positionToWork].sources = octreeIterator.getCurrentListSrc();
368

369 370
                octreeIterator.moveRight();
            }
371

372
            #pragma omp barrier
373

berenger-bramas's avatar
berenger-bramas committed
374
            FDEBUG(if(!omp_get_thread_num()) computationCounter.tic());
375

berenger-bramas's avatar
berenger-bramas committed
376
            KernelClass& myThreadkernels = (*kernels[omp_get_thread_num()]);
377
            // There is a maximum of 26 neighbors
berenger-bramas's avatar
berenger-bramas committed
378
            ContainerClass* neighbors[27];
379
            int previous = 0;
380 381

            for(int idxShape = 0 ; idxShape < SizeShape ; ++idxShape){
382
                const int endAtThisShape = this->shapeLeaf[idxShape] + previous;
383

384
                #pragma omp for schedule(dynamic)
385 386 387
                for(int idxLeafs = previous ; idxLeafs < endAtThisShape ; ++idxLeafs){
                    LeafData& currentIter = leafsDataArray[idxLeafs];
                    myThreadkernels.L2P(currentIter.cell, currentIter.targets);
388
                    // need the current particles and neighbors particles
389
                    FDEBUG(if(!omp_get_thread_num()) computationCounterP2P.tic());
390
                    const int counter = tree->getLeafsNeighbors(neighbors, currentIter.cell->getCoordinate(), LeafIndex);
391 392
                    myThreadkernels.P2P(currentIter.cell->getCoordinate(), currentIter.targets,
                                        currentIter.sources, neighbors, counter);
393
                    FDEBUG(if(!omp_get_thread_num()) computationCounterP2P.tac());
394
                }
395 396

                previous = endAtThisShape;
397 398
            }
        }
399

400 401
        FDEBUG(computationCounter.tac());

402
        delete [] leafsDataArray;
403 404 405 406
        for(int idxShape = 0 ; idxShape < SizeShape ; ++idxShape){
            omp_destroy_lock(&lockShape[idxShape]);
        }

407

408 409
        FDEBUG( FDebug::Controller << "\tFinished (@Direct Pass (L2P + P2P) = "  << counterTime.tacAndElapsed() << "s)\n" );
        FDEBUG( FDebug::Controller << "\t\t Computation L2P + P2P : " << computationCounter.cumulated() << " s\n" );
410
        FDEBUG( FDebug::Controller << "\t\t Computation P2P : " << computationCounterP2P.cumulated() << " s\n" );
411

412 413 414 415 416 417 418
    }

};


#endif //FFMMALGORITHMTHREAD_HPP

419