Attention une mise à jour du serveur va être effectuée le vendredi 16 avril entre 12h et 12h30. Cette mise à jour va générer une interruption du service de quelques minutes.

FFmmAlgorithmThreadProc.hpp 38 KB
Newer Older
1 2 3 4 5 6 7 8 9 10
#ifndef FFMMALGORITHMTHREADPROC_HPP
#define FFMMALGORITHMTHREADPROC_HPP
// /!\ Please, you must read the license at the bottom of this page

#include "../Utils/FAssertable.hpp"
#include "../Utils/FDebug.hpp"
#include "../Utils/FTrace.hpp"
#include "../Utils/FTic.hpp"
#include "../Utils/FGlobal.hpp"

11
#include "../Containers/FBoolArray.hpp"
12
#include "../Containers/FOctree.hpp"
13
#include "../Containers/FBufferVector.hpp"
14

berenger-bramas's avatar
berenger-bramas committed
15
#include "../Utils/FMpi.hpp"
16 17 18 19 20 21 22 23 24

#include <omp.h>

/**
* @author Berenger Bramas (berenger.bramas@inria.fr)
* @class FFmmAlgorithmThreadProc
* @brief
* Please read the license
*
berenger-bramas's avatar
berenger-bramas committed
25
* This class is a threaded FMM algorithm with mpi.
26 27 28 29 30 31 32
* It just iterates on a tree and call the kernels with good arguments.
* It used the inspector-executor model :
* iterates on the tree and builds an array to work in parallel on this array
*
* Of course this class does not deallocate pointer given in arguements.
*
* Threaded & based on the inspector-executor model
berenger-bramas's avatar
berenger-bramas committed
33 34
* schedule(runtime) export OMP_NUM_THREADS=2
* export OMPI_CXX=`which g++-4.4`
35 36 37
* mpirun -np 2 valgrind --suppressions=/usr/share/openmpi/openmpi-valgrind.supp
* --tool=memcheck --leak-check=yes --show-reachable=yes --num-callers=20 --track-fds=yes
* ./Tests/testFmmAlgorithmProc ../Data/testLoaderSmall.fma.tmp
38 39
*/
template<template< class ParticleClass, class CellClass, int OctreeHeight> class KernelClass,
berenger-bramas's avatar
berenger-bramas committed
40 41 42
class ParticleClass, class CellClass,
template<class ParticleClass> class LeafClass,
int OctreeHeight, int SubtreeHeight>
berenger-bramas's avatar
berenger-bramas committed
43
        class FFmmAlgorithmThreadProc : protected FAssertable {
44 45 46 47 48
    // To reduce the size of variable type based on foctree in this file
    typedef FOctree<ParticleClass, CellClass, LeafClass, OctreeHeight, SubtreeHeight> Octree;
    typedef typename FOctree<ParticleClass, CellClass,LeafClass, OctreeHeight, SubtreeHeight>::Iterator OctreeIterator;
    typedef KernelClass<ParticleClass, CellClass, OctreeHeight> Kernel;

49 50
    FMpi& app;                          //< The app to communicate

51 52
    Octree* const tree;                 //< The octree to work on
    Kernel** kernels;                   //< The kernels
53

berenger-bramas's avatar
berenger-bramas committed
54 55 56
    OctreeIterator* iterArray;          //< To store the iterator
    OctreeIterator* previousIterArray;  //< To store the previous iterator

berenger-bramas's avatar
berenger-bramas committed
57 58 59
    int leafLeft;                   //< To store the left limit at the previous level
    int leafRight;                  //< To store the right limit at the previous level
    int numberOfLeafs;               //< To store the size at the previous level
60

berenger-bramas's avatar
berenger-bramas committed
61 62
    int leftOffsets[OctreeHeight];      //< the right limit at different level
    int rightOffsets[OctreeHeight];     //< the left limit at different level
63

berenger-bramas's avatar
berenger-bramas committed
64
    const int MaxThreads;               //< the max number of thread allowed by openmp
65

berenger-bramas's avatar
berenger-bramas committed
66 67
    const int nbProcess;                //< Number of process
    const int idPorcess;                //< Id of current process
berenger-bramas's avatar
berenger-bramas committed
68

69
    const static int BufferSize = 2000;      //< To know max of the buffer we receive
70
    FBufferVector<BufferSize> * sendBuffer;  //< To put data to send into a buffer
71

berenger-bramas's avatar
berenger-bramas committed
72 73 74
    /** To swap between two arrays
      * the current and the previous
      */
75 76 77 78 79 80 81 82 83 84 85 86
    void swapArray(){
        OctreeIterator* const temp = iterArray;
        iterArray = previousIterArray;
        previousIterArray = temp;
    }

public:
    /** The constructor need the octree and the kernels used for computation
      * @param inTree the octree to work on
      * @param inKernels the kernels to call
      * An assert is launched if one of the arguments is null
      */
87 88
    FFmmAlgorithmThreadProc(FMpi& inApp, Octree* const inTree, Kernel* const inKernels)
        : app(inApp), tree(inTree) , kernels(0), iterArray(0),
berenger-bramas's avatar
berenger-bramas committed
89
        previousIterArray(0), leafLeft(0),leafRight(0), numberOfLeafs(0),
90 91
        MaxThreads(omp_get_max_threads()), nbProcess(inApp.processCount()), idPorcess(inApp.processId()),
        sendBuffer(0) {
92 93 94

        assert(tree, "tree cannot be null", __LINE__, __FILE__);

berenger-bramas's avatar
berenger-bramas committed
95 96
        this->kernels = new Kernel*[MaxThreads];
        for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread){
97 98 99
            this->kernels[idxThread] = new KernelClass<ParticleClass, CellClass, OctreeHeight>(*inKernels);
        }

berenger-bramas's avatar
berenger-bramas committed
100
        this->sendBuffer = new FBufferVector<BufferSize>[nbProcess];
101

102
        FDEBUG(FDebug::Controller << "FFmmAlgorithmThreadProc\n");
berenger-bramas's avatar
berenger-bramas committed
103
        FDEBUG(FDebug::Controller << "Max threads = "  << MaxThreads << ", Procs = " << app.processCount() << ".\n");
104 105 106 107
    }

    /** Default destructor */
    virtual ~FFmmAlgorithmThreadProc(){
berenger-bramas's avatar
berenger-bramas committed
108
        for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread){
109 110
            delete this->kernels[idxThread];
        }
berenger-bramas's avatar
berenger-bramas committed
111
        delete [] this->kernels;
112 113

        delete [] this->sendBuffer;
114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160
    }

    /**
      * To execute the fmm algorithm
      * Call this function to run the complete algorithm
      */
    void execute(){
        FTRACE( FTrace::Controller.enterFunction(FTrace::FMM, __FUNCTION__ , __FILE__ , __LINE__) );

        // Count leaf
        int leafs = 0;
        OctreeIterator octreeIterator(tree);
        octreeIterator.gotoBottomLeft();
        do{
            ++leafs;
        } while(octreeIterator.moveRight());

        iterArray = new OctreeIterator[leafs];
        assert(iterArray, "iterArray bad alloc", __LINE__, __FILE__);

        previousIterArray = new OctreeIterator[leafs];
        assert(previousIterArray, "previousIterArray bad alloc", __LINE__, __FILE__);

        // init offsets
        for(int idxOff = 0 ; idxOff < OctreeHeight ; ++idxOff){
            leftOffsets[idxOff] = 0;
            rightOffsets[idxOff] = 0;
        }

        // run
        bottomPass();

        upwardPass();

        downardPass();

        directPass();

        // delete array
        delete [] iterArray;
        iterArray = 0;
        delete [] previousIterArray;
        previousIterArray = 0;

        FTRACE( FTrace::Controller.leaveFunction(FTrace::FMM) );
    }

berenger-bramas's avatar
berenger-bramas committed
161 162 163 164
    /////////////////////////////////////////////////////////////////////////////
    // Utils functions
    /////////////////////////////////////////////////////////////////////////////

berenger-bramas's avatar
berenger-bramas committed
165 166 167
    int getLeft(const int inSize) const {
        const float step = (float(inSize) / nbProcess);
        return int(FMath::Ceil(step * idPorcess));
berenger-bramas's avatar
berenger-bramas committed
168 169
    }

berenger-bramas's avatar
berenger-bramas committed
170 171 172
    int getRight(const int inSize) const {
        const float step = (float(inSize) / nbProcess);
        const int res = int(FMath::Ceil(step * (idPorcess+1)));
berenger-bramas's avatar
berenger-bramas committed
173 174 175 176
        if(res > inSize) return inSize;
        else return res;
    }

berenger-bramas's avatar
berenger-bramas committed
177 178 179 180 181 182 183
    int getOtherRight(const int inSize,const int other) const {
        const float step = (float(inSize) / nbProcess);
        const int res = int(FMath::Ceil(step * (other+1)));
        if(res > inSize) return inSize;
        else return res;
    }

berenger-bramas's avatar
berenger-bramas committed
184 185
    int getProc(const int position, const int inSize) const {
        const float step = (float(inSize) / nbProcess);
berenger-bramas's avatar
berenger-bramas committed
186 187 188
        return int(position/step);
    }

berenger-bramas's avatar
berenger-bramas committed
189 190 191 192
    /////////////////////////////////////////////////////////////////////////////
    // P2M
    /////////////////////////////////////////////////////////////////////////////

193 194 195 196
    /** P2M */
    void bottomPass(){
        FTRACE( FTrace::Controller.enterFunction(FTrace::FMM, __FUNCTION__ , __FILE__ , __LINE__) );
        FDEBUG( FDebug::Controller.write("\tStart Bottom Pass\n").write(FDebug::Flush) );
berenger-bramas's avatar
berenger-bramas committed
197
        FDEBUG(FTic counterTime);
198 199

        OctreeIterator octreeIterator(tree);
berenger-bramas's avatar
berenger-bramas committed
200

201 202 203 204 205 206 207 208 209
        int leafs = 0;

        // Iterate on leafs
        octreeIterator.gotoBottomLeft();
        do{
            iterArray[leafs] = octreeIterator;
            ++leafs;
        } while(octreeIterator.moveRight());

berenger-bramas's avatar
berenger-bramas committed
210 211
        const int startIdx = getLeft(leafs);
        const int endIdx = getRight(leafs);
212

berenger-bramas's avatar
berenger-bramas committed
213
        FDEBUG(FTic computationCounter);
berenger-bramas's avatar
berenger-bramas committed
214
        #pragma omp parallel
215 216
        {
            Kernel * const myThreadkernels = kernels[omp_get_thread_num()];
berenger-bramas's avatar
berenger-bramas committed
217
            #pragma omp for nowait
218 219 220 221 222 223 224 225 226
            for(int idxLeafs = startIdx ; idxLeafs < endIdx ; ++idxLeafs){
                // We need the current cell that represent the leaf
                // and the list of particles
                myThreadkernels->P2M( iterArray[idxLeafs].getCurrentCell() , iterArray[idxLeafs].getCurrentListSrc());
            }
        }
        FDEBUG(computationCounter.tac());

        swapArray();
berenger-bramas's avatar
berenger-bramas committed
227 228 229
        this->leafLeft = startIdx;
        this->leafRight = endIdx - 1;
        this->numberOfLeafs = leafs;
berenger-bramas's avatar
berenger-bramas committed
230 231

        FDEBUG( FDebug::Controller << "\tFinished ("  << counterTime.tacAndElapsed() << "s)\n" );
232 233 234 235
        FDEBUG( FDebug::Controller << "\t\t Computation : " << computationCounter.elapsed() << " s\n" );
        FTRACE( FTrace::Controller.leaveFunction(FTrace::FMM) );
    }

berenger-bramas's avatar
berenger-bramas committed
236 237 238 239
    /////////////////////////////////////////////////////////////////////////////
    // Upward
    /////////////////////////////////////////////////////////////////////////////

240 241 242 243
    /** M2M */
    void upwardPass(){
        FTRACE( FTrace::Controller.enterFunction(FTrace::FMM, __FUNCTION__ , __FILE__ , __LINE__) );
        FDEBUG( FDebug::Controller.write("\tStart Upward Pass\n").write(FDebug::Flush); );
berenger-bramas's avatar
berenger-bramas committed
244 245 246 247
        FDEBUG(FTic counterTime);
        FDEBUG(FTic computationCounter);
        FDEBUG(FTic sendCounter);
        FDEBUG(FTic receiveCounter);
248 249 250 251 252 253 254

        // Start from leal level - 1
        OctreeIterator octreeIterator(tree);
        octreeIterator.gotoBottomLeft();
        octreeIterator.moveUp();
        OctreeIterator avoidGotoLeftIterator(octreeIterator);

berenger-bramas's avatar
berenger-bramas committed
255 256 257 258
        int previousLeft = this->leafLeft;
        int previousRight = this->leafRight;
        int previousSize = this->numberOfLeafs;

259 260
        // for each levels
        for(int idxLevel = OctreeHeight - 2 ; idxLevel > 1 ; --idxLevel ){
berenger-bramas's avatar
berenger-bramas committed
261

berenger-bramas's avatar
berenger-bramas committed
262
            int numberOfCells = 0;
263 264
            // for each cells
            do{
berenger-bramas's avatar
berenger-bramas committed
265 266
                iterArray[numberOfCells] = octreeIterator;
                ++numberOfCells;
267 268 269 270
            } while(octreeIterator.moveRight());
            avoidGotoLeftIterator.moveUp();
            octreeIterator = avoidGotoLeftIterator;// equal octreeIterator.moveUp(); octreeIterator.gotoLeft();

berenger-bramas's avatar
berenger-bramas committed
271 272
            const int startIdx = getLeft(numberOfCells);
            const int endIdx = getRight(numberOfCells);
273

berenger-bramas's avatar
berenger-bramas committed
274
            if(startIdx < numberOfCells){
berenger-bramas's avatar
berenger-bramas committed
275
                FDEBUG(sendCounter.tic());
berenger-bramas's avatar
berenger-bramas committed
276 277 278 279
                int leftOffset = 0;
                {
                    const MortonIndex MostLeftChild = iterArray[startIdx].getCurrentGlobalIndex() << 3;
                    const MortonIndex leftChildIter = previousIterArray[previousLeft].getCurrentGlobalIndex();
berenger-bramas's avatar
berenger-bramas committed
280

berenger-bramas's avatar
berenger-bramas committed
281 282 283 284 285 286
                    if(leftChildIter < MostLeftChild){
                        int parentOffset = startIdx - 1;
                        MortonIndex parentIndex = iterArray[parentOffset].getCurrentGlobalIndex();
                        MortonIndex childIndex = 0;
                        while( (childIndex = previousIterArray[previousLeft+leftOffset].getCurrentGlobalIndex()) < MostLeftChild){
                            childIndex >>= 3;
berenger-bramas's avatar
berenger-bramas committed
287

berenger-bramas's avatar
berenger-bramas committed
288 289 290
                            while(childIndex != parentIndex){
                                if(childIndex < parentIndex) --parentOffset;
                                else ++parentOffset;
berenger-bramas's avatar
berenger-bramas committed
291

berenger-bramas's avatar
berenger-bramas committed
292
                                parentIndex = iterArray[parentOffset].getCurrentGlobalIndex();
berenger-bramas's avatar
berenger-bramas committed
293

berenger-bramas's avatar
berenger-bramas committed
294
                            }
berenger-bramas's avatar
berenger-bramas committed
295

berenger-bramas's avatar
berenger-bramas committed
296 297
                            const int idxReceiver = getProc(parentOffset,numberOfCells);
                            if(!this->sendBuffer[idxReceiver].addEnoughSpace(previousIterArray[previousLeft+leftOffset].getCurrentCell()->bytesToSendUp())){
298 299 300
                                app.sendData(idxReceiver,this->sendBuffer[idxReceiver].getSize(),this->sendBuffer[idxReceiver].getData(),idxLevel);
                                this->sendBuffer[idxReceiver].clear();
                            }
berenger-bramas's avatar
berenger-bramas committed
301
                            this->sendBuffer[idxReceiver].addDataUp(previousLeft+leftOffset,*previousIterArray[previousLeft+leftOffset].getCurrentCell());
berenger-bramas's avatar
berenger-bramas committed
302

berenger-bramas's avatar
berenger-bramas committed
303 304
                            ++leftOffset;
                        }
305
                    }
berenger-bramas's avatar
berenger-bramas committed
306
                    else if(previousLeft > 0 && leftChildIter > MostLeftChild){
berenger-bramas's avatar
berenger-bramas committed
307 308 309
                        while( previousIterArray[previousLeft+leftOffset - 1].getCurrentGlobalIndex() >= MostLeftChild){
                            --leftOffset;
                        }
310
                    }
berenger-bramas's avatar
berenger-bramas committed
311
                    leftOffsets[idxLevel+1] = leftOffset;
312 313
                }

berenger-bramas's avatar
berenger-bramas committed
314
                int rightOffset = 0;
315
                {
berenger-bramas's avatar
berenger-bramas committed
316 317
                    const MortonIndex MostRightChild = (iterArray[endIdx-1].getCurrentGlobalIndex() << 3) | 7;
                    const MortonIndex rightChildIter = previousIterArray[previousRight].getCurrentGlobalIndex();
berenger-bramas's avatar
berenger-bramas committed
318

berenger-bramas's avatar
berenger-bramas committed
319
                    if(previousRight < previousSize - 1 && rightChildIter < MostRightChild){
berenger-bramas's avatar
berenger-bramas committed
320 321 322
                        while( previousIterArray[previousRight-rightOffset+1].getCurrentGlobalIndex() <= MostRightChild){
                            --rightOffset;
                        }
323
                    }
berenger-bramas's avatar
berenger-bramas committed
324 325 326 327 328 329 330 331 332 333 334
                    else if(rightChildIter > MostRightChild){
                        int parentOffset = endIdx;
                        MortonIndex parentIndex = iterArray[parentOffset].getCurrentGlobalIndex();
                        MortonIndex childIndex = 0;
                        while( (childIndex = previousIterArray[previousRight-rightOffset].getCurrentGlobalIndex()) > MostRightChild){
                            childIndex >>= 3;
                            while(childIndex != parentIndex){
                                if(childIndex < parentIndex) --parentOffset;
                                else ++parentOffset;
                                parentIndex = iterArray[parentOffset].getCurrentGlobalIndex();
                            }
berenger-bramas's avatar
berenger-bramas committed
335 336
                            const int idxReceiver = getProc(parentOffset,numberOfCells);
                            if(!this->sendBuffer[idxReceiver].addEnoughSpace(previousIterArray[previousRight-rightOffset].getCurrentCell()->bytesToSendUp())){
337 338 339
                                app.sendData(idxReceiver,this->sendBuffer[idxReceiver].getSize(),this->sendBuffer[idxReceiver].getData(),idxLevel);
                                this->sendBuffer[idxReceiver].clear();
                            }
berenger-bramas's avatar
berenger-bramas committed
340
                            this->sendBuffer[idxReceiver].addDataUp(previousRight-rightOffset,*previousIterArray[previousRight-rightOffset].getCurrentCell());
berenger-bramas's avatar
berenger-bramas committed
341

berenger-bramas's avatar
berenger-bramas committed
342 343
                            ++rightOffset;
                        }
344
                    }
berenger-bramas's avatar
berenger-bramas committed
345
                    rightOffsets[idxLevel+1] = rightOffset;
346
                }
berenger-bramas's avatar
berenger-bramas committed
347
                FDEBUG(sendCounter.tac());
berenger-bramas's avatar
berenger-bramas committed
348

berenger-bramas's avatar
berenger-bramas committed
349
                #pragma omp parallel
350
                {
351 352 353 354 355 356 357 358 359 360 361 362 363
                    // send computed data
                    #pragma omp single nowait
                    {
                        if(rightOffset > 0 || leftOffset > 0){
                            for(int idxProc = 0 ; idxProc < this->nbProcess ; ++idxProc){
                                if(this->sendBuffer[idxProc].getSize()){
                                    app.sendData(idxProc,this->sendBuffer[idxProc].getSize(),this->sendBuffer[idxProc].getData(),idxLevel);
                                    this->sendBuffer[idxProc].clear();
                                }
                            }
                        }
                    }

berenger-bramas's avatar
berenger-bramas committed
364
                    // received computed data
berenger-bramas's avatar
berenger-bramas committed
365
                    #pragma omp single
berenger-bramas's avatar
berenger-bramas committed
366
                    {
berenger-bramas's avatar
berenger-bramas committed
367
                        FDEBUG(receiveCounter.tic());
berenger-bramas's avatar
berenger-bramas committed
368
                        int needToReceive = FMath::Max(0,-rightOffset) + FMath::Max(0,-leftOffset);
369

370 371 372
                        int source = 0, filled = 0;
                        int position;
                        char buffer[BufferSize];
373

berenger-bramas's avatar
berenger-bramas committed
374
                        while(needToReceive){
375 376 377 378 379 380
                            app.receiveData( BufferSize, idxLevel, buffer, &source, &filled);
                            for(int idxBuff = 0 ; idxBuff < filled;){
                                memcpy(&position,&buffer[idxBuff],sizeof(int));
                                idxBuff += sizeof(int);
                                idxBuff += previousIterArray[position].getCurrentCell()->readUp(&buffer[idxBuff],filled-idxBuff);
                                --needToReceive;
berenger-bramas's avatar
berenger-bramas committed
381
                            }
382
                        }
berenger-bramas's avatar
berenger-bramas committed
383
                        FDEBUG(receiveCounter.tac());
384 385
                    }

berenger-bramas's avatar
berenger-bramas committed
386
                    #pragma omp single nowait
berenger-bramas's avatar
berenger-bramas committed
387 388 389 390 391
                    {
                        FDEBUG(computationCounter.tic());
                    }

                    Kernel * const myThreadkernels = kernels[omp_get_thread_num()];
berenger-bramas's avatar
berenger-bramas committed
392
                    #pragma omp for nowait
393 394
                    for(int idxCell = startIdx ; idxCell < endIdx ; ++idxCell){
                        myThreadkernels->M2M( iterArray[idxCell].getCurrentCell() , iterArray[idxCell].getCurrentChild(), idxLevel);
berenger-bramas's avatar
berenger-bramas committed
395 396
                    }

berenger-bramas's avatar
berenger-bramas committed
397
                    #pragma omp single nowait
berenger-bramas's avatar
berenger-bramas committed
398 399 400
                    {
                        FDEBUG(computationCounter.tac());
                    }
401 402

                }
berenger-bramas's avatar
berenger-bramas committed
403 404
            }
            else {
berenger-bramas's avatar
berenger-bramas committed
405
                int parentOffset = numberOfCells - 1;
berenger-bramas's avatar
berenger-bramas committed
406 407
                MortonIndex parentIndex = iterArray[parentOffset].getCurrentGlobalIndex();

408 409
                for(int idxCell = previousRight ; idxCell >= previousLeft ; --idxCell){
                    const MortonIndex childIndex = previousIterArray[idxCell].getCurrentGlobalIndex() >> 3;
berenger-bramas's avatar
berenger-bramas committed
410 411 412 413
                    while(childIndex != parentIndex){
                        --parentOffset;
                        parentIndex = iterArray[parentOffset].getCurrentGlobalIndex();
                    }
berenger-bramas's avatar
berenger-bramas committed
414
                    const int idxReceiver = getProc(parentOffset,numberOfCells);
415
                    if(!this->sendBuffer[idxReceiver].addEnoughSpace(previousIterArray[idxCell].getCurrentCell()->bytesToSendUp())){
416 417 418
                        app.sendData(idxReceiver,this->sendBuffer[idxReceiver].getSize(),this->sendBuffer[idxReceiver].getData(),idxLevel);
                        this->sendBuffer[idxReceiver].clear();
                    }
419
                    this->sendBuffer[idxReceiver].addDataUp(idxCell,*previousIterArray[idxCell].getCurrentCell());
420 421 422 423 424 425 426
                }

                for(int idxProc = 0 ; idxProc < this->nbProcess ; ++idxProc){
                    if(this->sendBuffer[idxProc].getSize()){
                        app.sendData(idxProc,this->sendBuffer[idxProc].getSize(),this->sendBuffer[idxProc].getData(),idxLevel);
                        this->sendBuffer[idxProc].clear();
                    }
427 428
                }

berenger-bramas's avatar
berenger-bramas committed
429
                leftOffsets[idxLevel+1] = (previousRight-previousLeft) + 1;
430 431 432
            }

            swapArray();
berenger-bramas's avatar
berenger-bramas committed
433 434 435
            previousLeft = startIdx;
            previousRight = endIdx - 1;
            previousSize = numberOfCells;
berenger-bramas's avatar
berenger-bramas committed
436

437 438
        }

439 440
        app.processBarrier();

berenger-bramas's avatar
berenger-bramas committed
441 442 443 444
        FDEBUG( FDebug::Controller << "\tFinished ("  << counterTime.tacAndElapsed() << "s)\n" );
        FDEBUG( FDebug::Controller << "\t\t Computation : " << computationCounter.cumulated() << " s\n" );
        FDEBUG( FDebug::Controller << "\t\t Send : " << sendCounter.cumulated() << " s\n" );
        FDEBUG( FDebug::Controller << "\t\t Receive : " << receiveCounter.cumulated() << " s\n" );
445 446 447
        FTRACE( FTrace::Controller.leaveFunction(FTrace::FMM) );
    }

berenger-bramas's avatar
berenger-bramas committed
448 449 450 451
    /////////////////////////////////////////////////////////////////////////////
    // Downard
    /////////////////////////////////////////////////////////////////////////////

452 453 454 455 456
    /** M2L L2L */
    void downardPass(){
        FTRACE( FTrace::Controller.enterFunction(FTrace::FMM, __FUNCTION__ , __FILE__ , __LINE__) );

        { // first M2L
berenger-bramas's avatar
berenger-bramas committed
457 458 459 460 461
            FDEBUG( FDebug::Controller.write("\tStart Downward Pass (M2L)\n").write(FDebug::Flush); );
            FDEBUG(FTic counterTime);
            FDEBUG(FTic computationCounter);
            FDEBUG(FTic sendCounter);
            FDEBUG(FTic receiveCounter);
berenger-bramas's avatar
berenger-bramas committed
462
            FDEBUG(FTic waitingToReceiveCounter);
berenger-bramas's avatar
berenger-bramas committed
463 464
            FDEBUG(FTic waitSendCounter);
            FDEBUG(FTic findCounter);
berenger-bramas's avatar
berenger-bramas committed
465

466 467 468 469
            OctreeIterator octreeIterator(tree);
            octreeIterator.moveDown();
            OctreeIterator avoidGotoLeftIterator(octreeIterator);

berenger-bramas's avatar
berenger-bramas committed
470 471 472
            FBoolArray* const indexToReceive = new FBoolArray(1 << (3*(OctreeHeight-1)));

            struct LimitCell { int counter;  CellClass* neighbors[208]; };
473
            LimitCell ** const unfinishedCells = new LimitCell*[this->leafRight - this->leafLeft + 1];
berenger-bramas's avatar
berenger-bramas committed
474 475 476 477

            FBoolArray* alreadySent[this->nbProcess];
            MortonIndex upperLimitForProc[this->nbProcess];
            for(int idxProc = 0 ; idxProc < nbProcess; ++idxProc){
478
                alreadySent[idxProc] = new FBoolArray(this->leafRight - this->leafLeft + 1);
berenger-bramas's avatar
berenger-bramas committed
479 480
            }

481 482
            // for each levels
            for(int idxLevel = 2 ; idxLevel < OctreeHeight ; ++idxLevel ){
berenger-bramas's avatar
berenger-bramas committed
483
                int numberOfCells = 0;
484 485
                // for each cells
                do{
berenger-bramas's avatar
berenger-bramas committed
486 487
                    iterArray[numberOfCells] = octreeIterator;
                    ++numberOfCells;
488 489 490 491
                } while(octreeIterator.moveRight());
                avoidGotoLeftIterator.moveDown();
                octreeIterator = avoidGotoLeftIterator;

berenger-bramas's avatar
berenger-bramas committed
492 493
                const int startIdx = getLeft(numberOfCells);
                const int endIdx = getRight(numberOfCells);
494 495 496 497 498

                // Get limit in term of morton index
                const MortonIndex startIdxIndex = iterArray[startIdx].getCurrentGlobalIndex();
                const MortonIndex endIdxIndex = iterArray[endIdx-1].getCurrentGlobalIndex();

berenger-bramas's avatar
berenger-bramas committed
499
                // reset arrays
500
                for(int idxProc = 0 ; idxProc < nbProcess; ++idxProc){
berenger-bramas's avatar
berenger-bramas committed
501 502
                    alreadySent[idxProc]->setToZeros();
                    upperLimitForProc[idxProc] = iterArray[getOtherRight(numberOfCells,idxProc)-1].getCurrentGlobalIndex();
503
                }
berenger-bramas's avatar
berenger-bramas committed
504 505
                indexToReceive->setToZeros();

506

berenger-bramas's avatar
berenger-bramas committed
507 508 509
                // to count missing data
                int needToReceive = 0;

berenger-bramas's avatar
berenger-bramas committed
510
                #pragma omp parallel
511 512 513 514 515
                {
                    CellClass* neighbors[208];
                    MortonIndex neighborsIndexes[208];
                    Kernel * const myThreadkernels = kernels[omp_get_thread_num()];

berenger-bramas's avatar
berenger-bramas committed
516
                    #pragma omp single nowait
berenger-bramas's avatar
berenger-bramas committed
517 518
                    {
                        FDEBUG(sendCounter.tic());
berenger-bramas's avatar
berenger-bramas committed
519
                        FDEBUG(computationCounter.tic());
berenger-bramas's avatar
berenger-bramas committed
520 521
                    }

522
                    #pragma omp for //schedule(dynamic)
523 524
                    for(int idxCell = startIdx ; idxCell < endIdx ; ++idxCell){
                        const int neighborsCounter = tree->getDistantNeighborsWithIndex(neighbors, neighborsIndexes, iterArray[idxCell].getCurrentGlobalIndex(),idxLevel);
525 526
                        bool needData = false;

berenger-bramas's avatar
berenger-bramas committed
527

528 529 530 531 532
                        for(int idxNeighbor = 0 ; idxNeighbor < neighborsCounter ; ++idxNeighbor){
                            // Get Morton index from this neigbor cell
                            const MortonIndex indexCell = neighborsIndexes[idxNeighbor];
                            // Is this cell out of our interval?
                            if(indexCell < startIdxIndex || endIdxIndex < indexCell){
berenger-bramas's avatar
berenger-bramas committed
533
                                FDEBUG(findCounter.tic());
534 535
                                // Yes we need to send the center of computation
                                // but who this cell belong to?
berenger-bramas's avatar
berenger-bramas committed
536 537
                                int idxReceiver = this->idPorcess;

538
                                if(indexCell < startIdxIndex){
berenger-bramas's avatar
berenger-bramas committed
539
                                    --idxReceiver;
540
                                    while(idxReceiver && indexCell <= upperLimitForProc[idxReceiver-1]){
berenger-bramas's avatar
berenger-bramas committed
541
                                        --idxReceiver;
542 543 544
                                    }
                                }
                                else {
berenger-bramas's avatar
berenger-bramas committed
545 546 547
                                    ++idxReceiver;
                                    while(upperLimitForProc[idxReceiver] < indexCell){
                                        ++idxReceiver;
548 549 550
                                    }
                                }

berenger-bramas's avatar
berenger-bramas committed
551 552 553
                                FDEBUG(findCounter.tac());

                                FDEBUG(computationCounter.tac());
berenger-bramas's avatar
berenger-bramas committed
554
                                #pragma omp critical(CheckToSend)
555
                                {
556 557
                                    if(!alreadySent[idxReceiver]->get(idxCell-startIdx)){
                                        alreadySent[idxReceiver]->set(idxCell-startIdx,true);
558
                                        needData = true;
559

560
                                        if(!this->sendBuffer[idxReceiver].addEnoughSpace(iterArray[idxCell].getCurrentCell()->bytesToSendUp())){
berenger-bramas's avatar
berenger-bramas committed
561
                                            FDEBUG(waitSendCounter.tic());
562
                                            app.sendData(idxReceiver,this->sendBuffer[idxReceiver].getSize(),this->sendBuffer[idxReceiver].getData(),idxLevel);
berenger-bramas's avatar
berenger-bramas committed
563
                                            FDEBUG(waitSendCounter.tac());
564 565
                                            this->sendBuffer[idxReceiver].clear();
                                        }
566
                                        this->sendBuffer[idxReceiver].addDataUp(idxCell,*iterArray[idxCell].getCurrentCell());
567 568
                                    }
                                }
berenger-bramas's avatar
berenger-bramas committed
569
                                #pragma omp critical(CheckToReceive)
570
                                {
berenger-bramas's avatar
berenger-bramas committed
571
                                    if(!indexToReceive->get(indexCell)){
572
                                        ++needToReceive;
berenger-bramas's avatar
berenger-bramas committed
573
                                        indexToReceive->set(indexCell,true);
574 575 576
                                        needData = true;
                                    }
                                }
berenger-bramas's avatar
berenger-bramas committed
577
                                FDEBUG(computationCounter.tic());
578 579
                            }
                        }
berenger-bramas's avatar
berenger-bramas committed
580
                        FDEBUG(computationCounter.tac());
berenger-bramas's avatar
berenger-bramas committed
581
                        // if need data we can not compute now
582
                        if(needData){
583
                            const int currentCell = idxCell - startIdx;
584 585 586
                            unfinishedCells[currentCell] = new LimitCell();
                            unfinishedCells[currentCell]->counter = neighborsCounter;
                            memcpy(unfinishedCells[currentCell]->neighbors,neighbors,sizeof(CellClass*)*neighborsCounter);
587
                            alreadySent[idPorcess]->set(idxCell-startIdx,true);
588
                        }
berenger-bramas's avatar
berenger-bramas committed
589
                        // we can compute now !
590
                        else if(neighborsCounter){
berenger-bramas's avatar
berenger-bramas committed
591
                            FDEBUG(computationCounter.tic());
592
                            myThreadkernels->M2L(  iterArray[idxCell].getCurrentCell() , neighbors, neighborsCounter, idxLevel);
berenger-bramas's avatar
berenger-bramas committed
593
                            FDEBUG(computationCounter.tac());
594
                        }
berenger-bramas's avatar
berenger-bramas committed
595
                        FDEBUG(computationCounter.tic());
596
                    }
berenger-bramas's avatar
berenger-bramas committed
597
                    FDEBUG(computationCounter.tac());
598

berenger-bramas's avatar
berenger-bramas committed
599
                    #pragma omp single nowait
berenger-bramas's avatar
berenger-bramas committed
600
                    {
berenger-bramas's avatar
berenger-bramas committed
601
                        FDEBUG(waitSendCounter.tic());
602 603 604 605 606 607
                        for(int idxProc = 0 ; idxProc < this->nbProcess ; ++idxProc){
                            if(this->sendBuffer[idxProc].getSize()){
                                app.sendData(idxProc,this->sendBuffer[idxProc].getSize(),this->sendBuffer[idxProc].getData(),idxLevel);
                                this->sendBuffer[idxProc].clear();
                            }
                        }
berenger-bramas's avatar
berenger-bramas committed
608
                        FDEBUG(waitSendCounter.tac());
609

berenger-bramas's avatar
berenger-bramas committed
610 611 612
                        FDEBUG(sendCounter.tac());
                    }

613
                    // received computed data
berenger-bramas's avatar
berenger-bramas committed
614
                    #pragma omp single
615
                    {
berenger-bramas's avatar
berenger-bramas committed
616
                        FDEBUG(receiveCounter.tic());
617

618 619 620

                        //FDEBUG( FDebug::Controller << "\t\tNeed to receive "  << needToReceive << " cells.\n" );

621 622 623
                        int source = 0, filled = 0;
                        int position;
                        char buffer[BufferSize];
624 625

                        while(needToReceive){
berenger-bramas's avatar
berenger-bramas committed
626
                            FDEBUG(waitingToReceiveCounter.tic());
627
                            app.receiveData( BufferSize, idxLevel, buffer, &source, &filled);
berenger-bramas's avatar
berenger-bramas committed
628
                            FDEBUG(waitingToReceiveCounter.tac());
629 630 631 632 633
                            for(int idxBuff = 0 ; idxBuff < filled;){
                                memcpy(&position,&buffer[idxBuff],sizeof(int));
                                idxBuff += sizeof(int);
                                idxBuff += iterArray[position].getCurrentCell()->readUp(&buffer[idxBuff],filled-idxBuff);
                                --needToReceive;
634 635
                            }
                        }
636

berenger-bramas's avatar
berenger-bramas committed
637
                        FDEBUG(receiveCounter.tac());
638 639
                    }

berenger-bramas's avatar
berenger-bramas committed
640
                    #pragma omp single nowait
berenger-bramas's avatar
berenger-bramas committed
641 642 643 644
                    {
                        FDEBUG(computationCounter.tic());
                    }

berenger-bramas's avatar
berenger-bramas committed
645
                    #pragma omp for nowait
646 647 648 649
                    for(int idxCell = startIdx ; idxCell < endIdx ; ++idxCell){
                        if(alreadySent[idPorcess]->get(idxCell-startIdx)){
                            myThreadkernels->M2L(  iterArray[idxCell].getCurrentCell() , unfinishedCells[idxCell-startIdx]->neighbors, unfinishedCells[idxCell-startIdx]->counter, idxLevel);
                            delete unfinishedCells[idxCell-startIdx];
650 651
                        }
                    }
berenger-bramas's avatar
berenger-bramas committed
652

berenger-bramas's avatar
berenger-bramas committed
653
                    #pragma omp single nowait
berenger-bramas's avatar
berenger-bramas committed
654 655 656
                    {
                        FDEBUG(computationCounter.tac());
                    }
657
                }
berenger-bramas's avatar
berenger-bramas committed
658
            }
659

660
            for(int idxProc = 0 ; idxProc < nbProcess; ++idxProc){
berenger-bramas's avatar
berenger-bramas committed
661
                delete alreadySent[idxProc];
662
            }
663

664
            delete [] unfinishedCells;
berenger-bramas's avatar
berenger-bramas committed
665
            delete indexToReceive;
666

667 668
            app.processBarrier();

berenger-bramas's avatar
berenger-bramas committed
669 670 671 672
            FDEBUG( FDebug::Controller << "\tFinished ("  << counterTime.tacAndElapsed() << "s)\n" );
            FDEBUG( FDebug::Controller << "\t\t Computation : " << computationCounter.cumulated() << " s\n" );
            FDEBUG( FDebug::Controller << "\t\t Send : " << sendCounter.cumulated() << " s\n" );
            FDEBUG( FDebug::Controller << "\t\t Receive : " << receiveCounter.cumulated() << " s\n" );
berenger-bramas's avatar
berenger-bramas committed
673
            FDEBUG( FDebug::Controller << "\t\t Wait data to Receive : " << waitingToReceiveCounter.cumulated() << " s\n" );
berenger-bramas's avatar
berenger-bramas committed
674 675
            FDEBUG( FDebug::Controller << "\t\t\tTotal time to send in mpi "  << waitSendCounter.cumulated() << " s.\n" );
            FDEBUG( FDebug::Controller << "\t\t\tTotal time to find "  << findCounter.cumulated() << " s.\n" );
676
        }
berenger-bramas's avatar
berenger-bramas committed
677

678 679

        { // second L2L
berenger-bramas's avatar
berenger-bramas committed
680 681 682 683 684 685
            FDEBUG( FDebug::Controller.write("\tStart Downward Pass (L2L)\n").write(FDebug::Flush); );
            FDEBUG(FTic counterTime);
            FDEBUG(FTic computationCounter);
            FDEBUG(FTic sendCounter);
            FDEBUG(FTic receiveCounter);

686 687 688 689 690 691 692 693
            OctreeIterator octreeIterator(tree);
            octreeIterator.moveDown();

            OctreeIterator avoidGotoLeftIterator(octreeIterator);

            const int heightMinusOne = OctreeHeight - 1;

            // for each levels exepted leaf level
berenger-bramas's avatar
berenger-bramas committed
694
            for(int idxLevel = 2 ; idxLevel < OctreeHeight ; ++idxLevel ){
695

berenger-bramas's avatar
berenger-bramas committed
696
                int numberOfCells = 0;
berenger-bramas's avatar
berenger-bramas committed
697 698
                // for each cells
                do{
berenger-bramas's avatar
berenger-bramas committed
699 700
                    iterArray[numberOfCells] = octreeIterator;
                    ++numberOfCells;
berenger-bramas's avatar
berenger-bramas committed
701 702 703
                } while(octreeIterator.moveRight());
                avoidGotoLeftIterator.moveDown();
                octreeIterator = avoidGotoLeftIterator;
704

berenger-bramas's avatar
berenger-bramas committed
705 706
                const int startIdx = getLeft(numberOfCells);
                const int endIdx = getRight(numberOfCells);
707

berenger-bramas's avatar
berenger-bramas committed
708 709
                const int currentLeft = startIdx;
                const int currentRight = endIdx -1;
710

berenger-bramas's avatar
berenger-bramas committed
711
                #pragma omp parallel
712 713
                {
                    // send computed data
berenger-bramas's avatar
berenger-bramas committed
714
                    #pragma omp single nowait
715
                    {
berenger-bramas's avatar
berenger-bramas committed
716
                        FDEBUG(sendCounter.tic());
717
                        const int leftOffset = -leftOffsets[idxLevel];
718 719 720
                        for(int idxCell = 1 ; idxCell <= leftOffset ; ++idxCell){
                            const int idxReceiver = getProc((currentLeft-idxCell),numberOfCells);
                            if(!this->sendBuffer[idxReceiver].addEnoughSpace(iterArray[currentLeft-idxCell].getCurrentCell()->bytesToSendDown())){
721 722 723
                                app.sendData(idxReceiver,this->sendBuffer[idxReceiver].getSize(),this->sendBuffer[idxReceiver].getData(),idxLevel);
                                this->sendBuffer[idxReceiver].clear();
                            }
724
                            this->sendBuffer[idxReceiver].addDataDown(currentLeft-idxCell,*iterArray[currentLeft-idxCell].getCurrentCell());
725 726
                        }
                        const int rightOffset = -rightOffsets[idxLevel];
727 728 729
                        for(int idxCell = 1 ; idxCell <= rightOffset ; ++idxCell){
                            const int idxReceiver = getProc((currentRight+idxCell),numberOfCells);
                            if(!this->sendBuffer[idxReceiver].addEnoughSpace(iterArray[currentRight+idxCell].getCurrentCell()->bytesToSendDown())){
730 731 732
                                app.sendData(idxReceiver,this->sendBuffer[idxReceiver].getSize(),this->sendBuffer[idxReceiver].getData(),idxLevel);
                                this->sendBuffer[idxReceiver].clear();
                            }
733
                            this->sendBuffer[idxReceiver].addDataDown(currentRight+idxCell,*iterArray[currentRight+idxCell].getCurrentCell());
734
                        }
735 736 737 738 739 740 741 742

                        for(int idxProc = 0 ; idxProc < this->nbProcess ; ++idxProc){
                            if(this->sendBuffer[idxProc].getSize()){
                                app.sendData(idxProc,this->sendBuffer[idxProc].getSize(),this->sendBuffer[idxProc].getData(),idxLevel);
                                this->sendBuffer[idxProc].clear();
                            }
                        }

berenger-bramas's avatar
berenger-bramas committed
743
                        FDEBUG(sendCounter.tac());
744 745 746
                    }

                    // received computed data
berenger-bramas's avatar
berenger-bramas committed
747
                    #pragma omp single
748
                    {
berenger-bramas's avatar
berenger-bramas committed
749
                        FDEBUG(receiveCounter.tic());
750
                        int needToReceive = FMath::Max(0,rightOffsets[idxLevel]) + FMath::Max(0,leftOffsets[idxLevel]);
751
                        //FDEBUG( FDebug::Controller << "\t\tNeed to receive "  << needToReceive << " cells.\n" );
752

753 754 755
                        int source = 0, filled = 0;
                        int position;
                        char buffer[BufferSize];
756 757

                        while(needToReceive){
758 759 760 761 762 763
                            app.receiveData( BufferSize, idxLevel, buffer, &source, &filled);
                            for(int idxBuff = 0 ; idxBuff < filled;){
                                memcpy(&position,&buffer[idxBuff],sizeof(int));
                                idxBuff += sizeof(int);
                                idxBuff += iterArray[position].getCurrentCell()->readDown(&buffer[idxBuff],filled-idxBuff);
                                --needToReceive;
764 765
                            }
                        }
766

berenger-bramas's avatar
berenger-bramas committed
767
                        FDEBUG(receiveCounter.tac());
768 769 770
                    }
                }

berenger-bramas's avatar
berenger-bramas committed
771 772
                if(idxLevel != heightMinusOne){
                    FDEBUG(computationCounter.tic());
berenger-bramas's avatar
berenger-bramas committed
773
                    #pragma omp parallel
774 775
                    {
                        Kernel * const myThreadkernels = kernels[omp_get_thread_num()];
berenger-bramas's avatar
berenger-bramas committed
776
                        #pragma omp for
777 778
                        for(int idxCell = startIdx ; idxCell < endIdx ; ++idxCell){
                            myThreadkernels->L2L( iterArray[idxCell].getCurrentCell() , iterArray[idxCell].getCurrentChild(), idxLevel);
779
                        }
780
                    }
berenger-bramas's avatar
berenger-bramas committed
781 782
                    FDEBUG(computationCounter.tac());
                }
783 784
            }

785 786
            app.processBarrier();

berenger-bramas's avatar
berenger-bramas committed
787 788 789 790 791
            FDEBUG( FDebug::Controller << "\tFinished ("  << counterTime.tacAndElapsed() << "s)\n" );
            FDEBUG( FDebug::Controller << "\t\t Computation : " << computationCounter.cumulated() << " s\n" );
            FDEBUG( FDebug::Controller << "\t\t Send : " << sendCounter.cumulated() << " s\n" );
            FDEBUG( FDebug::Controller << "\t\t Receive : " << receiveCounter.cumulated() << " s\n" );
        }
berenger-bramas's avatar
berenger-bramas committed
792

793 794 795
        FTRACE( FTrace::Controller.leaveFunction(FTrace::FMM) );
    }

berenger-bramas's avatar
berenger-bramas committed
796 797 798 799
    /////////////////////////////////////////////////////////////////////////////
    // Direct
    /////////////////////////////////////////////////////////////////////////////

800 801 802 803
    /** P2P */
    void directPass(){
        FTRACE( FTrace::Controller.enterFunction(FTrace::FMM, __FUNCTION__ , __FILE__ , __LINE__) );
        FDEBUG( FDebug::Controller.write("\tStart Direct Pass\n").write(FDebug::Flush); );
berenger-bramas's avatar
berenger-bramas committed
804
        FDEBUG(FTic counterTime);
805 806

        const int LeafIndex = OctreeHeight - 1;
807
        int leafs = 0;
808 809 810
        {
            OctreeIterator octreeIterator(tree);
            octreeIterator.gotoBottomLeft();
berenger-bramas's avatar
berenger-bramas committed
811
            // for each cells
812
            do{
813 814
                iterArray[leafs] = octreeIterator;
                ++leafs;
815 816 817
            } while(octreeIterator.moveRight());
        }

berenger-bramas's avatar
berenger-bramas committed
818
        FDEBUG(FTic computationCounter);
819

820 821
        const int startIdx = getLeft(leafs);
        const int endIdx = getRight(leafs);
822

berenger-bramas's avatar
berenger-bramas committed
823
        #pragma omp parallel
824 825 826 827 828
        {
            Kernel * const myThreadkernels = kernels[omp_get_thread_num()];
            // There is a maximum of 26 neighbors
            FList<ParticleClass*>* neighbors[26];

berenger-bramas's avatar
berenger-bramas committed
829
            #pragma omp for
830 831 832 833 834 835 836 837 838
            for(int idxLeafs = startIdx ; idxLeafs < endIdx ; ++idxLeafs){
                myThreadkernels->L2P(iterArray[idxLeafs].getCurrentCell(), iterArray[idxLeafs].getCurrentListTargets());
                // need the current particles and neighbors particles
                const int counter = tree->getLeafsNeighbors(neighbors, iterArray[idxLeafs].getCurrentGlobalIndex(),LeafIndex);
                myThreadkernels->P2P( iterArray[idxLeafs].getCurrentListTargets(), iterArray[idxLeafs].getCurrentListSrc() , neighbors, counter);
            }
        }
        FDEBUG(computationCounter.tac());

berenger-bramas's avatar
berenger-bramas committed
839 840

        FDEBUG( FDebug::Controller << "\tFinished ("  << counterTime.tacAndElapsed() << "s)\n" );
841