FFmmAlgorithmSectionTask.hpp 14.3 KB
Newer Older
1
// ===================================================================================
2 3 4 5 6 7 8 9 10 11 12 13 14
// Copyright ScalFmm 2011 INRIA, Olivier Coulaud, Bérenger Bramas, Matthias Messner
// olivier.coulaud@inria.fr, berenger.bramas@inria.fr
// This software is a computer program whose purpose is to compute the FMM.
//
// This software is governed by the CeCILL-C and LGPL licenses and
// abiding by the rules of distribution of free software.  
// 
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public and CeCILL-C Licenses for more details.
// "http://www.cecill.info". 
// "http://www.gnu.org/licenses".
15
// ===================================================================================
16 17
#ifndef FFMMALGORITHMSECTIONTASK_HPP
#define FFMMALGORITHMSECTIONTASK_HPP
18 19 20


#include "../Utils/FGlobal.hpp"
BRAMAS Berenger's avatar
BRAMAS Berenger committed
21
#include "../Utils/FAssert.hpp"
BRAMAS Berenger's avatar
BRAMAS Berenger committed
22
#include "../Utils/FLog.hpp"
23

24 25 26 27 28
#include "../Utils/FTic.hpp"

#include "../Containers/FOctree.hpp"
#include "../Containers/FVector.hpp"

29
#include "FCoreCommon.hpp"
30 31 32 33 34 35 36 37 38 39 40 41

/**
* @author Berenger Bramas (berenger.bramas@inria.fr)
* @class FFmmAlgorithmSectionTask
* @brief
* Please read the license
*
* This class is a basic FMM algorithm
* It just iterates on a tree and call the kernels with good arguments.
*
* Of course this class does not deallocate pointer given in arguements.
*/
42
template<class OctreeClass, class CellClass, class ContainerClass, class KernelClass, class LeafClass>
43
class FFmmAlgorithmSectionTask : public FAbstractAlgorithm, public FAlgorithmTimers {
44 45 46 47 48 49 50 51

    OctreeClass* const tree;       //< The octree to work on
    KernelClass** kernels;    //< The kernels

    const int MaxThreads;

    const int OctreeHeight;

52
    const int leafLevelSeperationCriteria;
53 54 55 56 57 58
public:
    /** The constructor need the octree and the kernels used for computation
      * @param inTree the octree to work on
      * @param inKernels the kernels to call
      * An assert is launched if one of the arguments is null
      */
59
    FFmmAlgorithmSectionTask(OctreeClass* const inTree, KernelClass* const inKernels, const int inLeafLevelSeperationCriteria = 1)
60
        : tree(inTree) , kernels(0),
61
          MaxThreads(omp_get_max_threads()), OctreeHeight(tree->getHeight()), leafLevelSeperationCriteria(inLeafLevelSeperationCriteria)
62 63
    {

BRAMAS Berenger's avatar
BRAMAS Berenger committed
64 65
        FAssertLF(tree, "tree cannot be null");
        FAssertLF(inKernels, "kernels cannot be null");
66 67

        this->kernels = new KernelClass*[MaxThreads];
68
        #pragma omp parallel for schedule(static)
69
        for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread){
70
            #pragma omp critical (InitFFmmAlgorithmSectionTask)
71 72 73
            {
                this->kernels[idxThread] = new KernelClass(*inKernels);
            }
74 75
        }

76 77
        FAbstractAlgorithm::setNbLevelsInTree(tree->getHeight());

BRAMAS Berenger's avatar
BRAMAS Berenger committed
78
        FLOG(FLog::Controller << "FFmmAlgorithmSectionTask (Max Thread " << omp_get_max_threads() << ")\n");
79 80 81 82 83 84 85 86 87 88
    }

    /** Default destructor */
    virtual ~FFmmAlgorithmSectionTask(){
        for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread){
            delete this->kernels[idxThread];
        }
        delete [] this->kernels;
    }

89
protected:
90 91 92 93
    /**
      * To execute the fmm algorithm
      * Call this function to run the complete algorithm
      */
94
    void executeCore(const unsigned operationsToProceed) override {
95 96 97 98 99 100 101

        #pragma omp parallel
        {
            #pragma omp sections
            {
                #pragma omp section
                {
102
                    Timers[P2MTimer].tic();
103
                    if(operationsToProceed & FFmmP2M) bottomPass();
104 105 106
                    Timers[P2MTimer].tac();
                    
                    Timers[M2MTimer].tic();
107
                    if(operationsToProceed & FFmmM2M) upwardPass();
108 109 110
                    Timers[M2MTimer].tac();
                    
                    Timers[M2LTimer].tic();
111
                    if(operationsToProceed & FFmmM2L) transferPass();
112 113 114
                    Timers[M2LTimer].tac();
                    
                    Timers[L2LTimer].tic();
115
                    if(operationsToProceed & FFmmL2L) downardPass();
116 117
                    Timers[L2LTimer].tac();
                    
118 119 120
                }
                #pragma omp section
                {
121 122 123
                    Timers[P2PTimer].tic();
                    if( operationsToProceed & FFmmP2P ) directPass();
                    Timers[P2PTimer].tac();
124 125 126 127
                }
            }
            #pragma omp single
            {
128
                Timers[L2PTimer].tic();
129
                if(operationsToProceed & FFmmL2P) L2PPass();
130
                Timers[L2PTimer].tac();
131 132
            }
        }
133
        Timers[NearTimer] = Timers[L2PTimer] + Timers[P2PTimer];
134 135 136 137 138 139 140 141
    }

    /////////////////////////////////////////////////////////////////////////////
    // P2M
    /////////////////////////////////////////////////////////////////////////////

    /** P2M */
    void bottomPass(){
BRAMAS Berenger's avatar
BRAMAS Berenger committed
142
        FLOG( FLog::Controller.write("\tStart Bottom Pass\n").write(FLog::Flush) );
BRAMAS Berenger's avatar
BRAMAS Berenger committed
143
        FLOG(FTic counterTime);
144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160

        typename OctreeClass::Iterator octreeIterator(tree);

        // Iterate on leafs
        octreeIterator.gotoBottomLeft();
        do{
            // We need the current cell that represent the leaf
            // and the list of particles
            #pragma omp task firstprivate(octreeIterator)
            {
                kernels[omp_get_thread_num()]->P2M( octreeIterator.getCurrentCell() , octreeIterator.getCurrentListSrc());
            }
        } while(octreeIterator.moveRight());

        #pragma omp taskwait


BRAMAS Berenger's avatar
BRAMAS Berenger committed
161
        FLOG( FLog::Controller << "\tFinished (@Bottom Pass (P2M) = "  << counterTime.tacAndElapsed() << "s)\n" );
162 163 164 165 166 167 168 169
    }

    /////////////////////////////////////////////////////////////////////////////
    // Upward
    /////////////////////////////////////////////////////////////////////////////

    /** M2M */
    void upwardPass(){
BRAMAS Berenger's avatar
BRAMAS Berenger committed
170
        FLOG( FLog::Controller.write("\tStart Upward Pass\n").write(FLog::Flush); );
BRAMAS Berenger's avatar
BRAMAS Berenger committed
171
        FLOG(FTic counterTime);
172 173 174 175 176 177

        // Start from leal level - 1
        typename OctreeClass::Iterator octreeIterator(tree);
        octreeIterator.gotoBottomLeft();
        octreeIterator.moveUp();

178
        for(int idxLevel = OctreeHeight - 2 ; idxLevel > FAbstractAlgorithm::lowerWorkingLevel-1 ; --idxLevel){
179 180 181
            octreeIterator.moveUp();
        }

182 183 184
        typename OctreeClass::Iterator avoidGotoLeftIterator(octreeIterator);

        // for each levels
185
        for(int idxLevel = FMath::Min(OctreeHeight - 2, FAbstractAlgorithm::lowerWorkingLevel - 1) ; idxLevel >= FAbstractAlgorithm::upperWorkingLevel ; --idxLevel ){
BRAMAS Berenger's avatar
BRAMAS Berenger committed
186
            FLOG(FTic counterTimeLevel);
187 188 189 190 191 192 193 194 195 196 197 198 199 200
            // for each cells
            do{
                // We need the current cell and the child
                // child is an array (of 8 child) that may be null
                #pragma omp task firstprivate(octreeIterator) shared(idxLevel)
                {
                    kernels[omp_get_thread_num()]->M2M( octreeIterator.getCurrentCell() , octreeIterator.getCurrentChild(), idxLevel);
                }
            } while(octreeIterator.moveRight());

            avoidGotoLeftIterator.moveUp();
            octreeIterator = avoidGotoLeftIterator;// equal octreeIterator.moveUp(); octreeIterator.gotoLeft();

            #pragma omp taskwait
BRAMAS Berenger's avatar
BRAMAS Berenger committed
201
            FLOG( FLog::Controller << "\t\t>> Level " << idxLevel << " = "  << counterTimeLevel.tacAndElapsed() << "s\n" );
202 203 204
        }


BRAMAS Berenger's avatar
BRAMAS Berenger committed
205
        FLOG( FLog::Controller << "\tFinished (@Upward Pass (M2M) = "  << counterTime.tacAndElapsed() << "s)\n" );
206 207 208 209 210 211 212 213
    }

    /////////////////////////////////////////////////////////////////////////////
    // Transfer
    /////////////////////////////////////////////////////////////////////////////

    /** M2L L2L */
    void transferPass(){
BRAMAS Berenger's avatar
BRAMAS Berenger committed
214
        FLOG( FLog::Controller.write("\tStart Downward Pass (M2L)\n").write(FLog::Flush); );
BRAMAS Berenger's avatar
BRAMAS Berenger committed
215
        FLOG(FTic counterTime);
216 217 218 219 220 221

        const CellClass* neighbors[343];

        typename OctreeClass::Iterator octreeIterator(tree);
        octreeIterator.moveDown();

222
        for(int idxLevel = 2 ; idxLevel < FAbstractAlgorithm::upperWorkingLevel ; ++idxLevel){
223 224 225
            octreeIterator.moveDown();
        }

226 227 228 229
        typename OctreeClass::Iterator avoidGotoLeftIterator(octreeIterator);


        // for each levels
230
        for(int idxLevel = FAbstractAlgorithm::upperWorkingLevel ; idxLevel < FAbstractAlgorithm::lowerWorkingLevel ; ++idxLevel ){
BRAMAS Berenger's avatar
BRAMAS Berenger committed
231
            FLOG(FTic counterTimeLevel);
232
            const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeperationCriteria);
233 234
            // for each cells
            do{
235
                const int counter = tree->getInteractionNeighbors(neighbors, octreeIterator.getCurrentGlobalCoordinate(), idxLevel, separationCriteria);
236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257
                if(counter){
                    #pragma omp task firstprivate(octreeIterator, neighbors, counter) shared(idxLevel)
                    {
                        kernels[omp_get_thread_num()]->M2L( octreeIterator.getCurrentCell() , neighbors, counter, idxLevel);
                    }
                }

            } while(octreeIterator.moveRight());

            avoidGotoLeftIterator.moveDown();
            octreeIterator = avoidGotoLeftIterator;

            #pragma omp taskwait

            for( int idxThread = 0 ; idxThread < omp_get_num_threads() ; ++idxThread){
                #pragma omp task
                {
                    kernels[idxThread]->finishedLevelM2L(idxLevel);
                }
            }

            #pragma omp taskwait
BRAMAS Berenger's avatar
BRAMAS Berenger committed
258
            FLOG( FLog::Controller << "\t\t>> Level " << idxLevel << " = "  << counterTimeLevel.tacAndElapsed() << "s\n" );
259 260
        }

BRAMAS Berenger's avatar
BRAMAS Berenger committed
261
        FLOG( FLog::Controller << "\tFinished (@Downward Pass (M2L) = "  << counterTime.tacAndElapsed() << "s)\n" );
262 263 264 265 266 267 268
    }

    /////////////////////////////////////////////////////////////////////////////
    // Downward
    /////////////////////////////////////////////////////////////////////////////

    void downardPass(){ // second L2L
BRAMAS Berenger's avatar
BRAMAS Berenger committed
269
        FLOG( FLog::Controller.write("\tStart Downward Pass (L2L)\n").write(FLog::Flush); );
BRAMAS Berenger's avatar
BRAMAS Berenger committed
270
        FLOG(FTic counterTime);
271 272 273 274

        typename OctreeClass::Iterator octreeIterator(tree);
        octreeIterator.moveDown();

275
        for(int idxLevel = 2 ; idxLevel < FAbstractAlgorithm::upperWorkingLevel ; ++idxLevel){
276 277 278
            octreeIterator.moveDown();
        }

279 280
        typename OctreeClass::Iterator avoidGotoLeftIterator(octreeIterator);

281
        const int heightMinusOne = FAbstractAlgorithm::lowerWorkingLevel - 1;
282
        // for each levels exepted leaf level
283
        for(int idxLevel = FAbstractAlgorithm::upperWorkingLevel ; idxLevel < heightMinusOne ; ++idxLevel ){
BRAMAS Berenger's avatar
BRAMAS Berenger committed
284
            FLOG(FTic counterTimeLevel);
285 286 287 288 289 290 291 292 293 294 295 296 297
            // for each cells
            do{
                #pragma omp task firstprivate(octreeIterator) shared(idxLevel)
                {
                    kernels[omp_get_thread_num()]->L2L( octreeIterator.getCurrentCell() , octreeIterator.getCurrentChild(), idxLevel);
                }

            } while(octreeIterator.moveRight());

            avoidGotoLeftIterator.moveDown();
            octreeIterator = avoidGotoLeftIterator;

            #pragma omp taskwait
BRAMAS Berenger's avatar
BRAMAS Berenger committed
298
            FLOG( FLog::Controller << "\t\t>> Level " << idxLevel << " = "  << counterTimeLevel.tacAndElapsed() << "s\n" );
299 300
        }

BRAMAS Berenger's avatar
BRAMAS Berenger committed
301
        FLOG( FLog::Controller << "\tFinished (@Downward Pass (L2L) = "  << counterTime.tacAndElapsed() << "s)\n" );
302 303 304 305 306 307 308 309 310
    }


    /////////////////////////////////////////////////////////////////////////////
    // Direct
    /////////////////////////////////////////////////////////////////////////////

    /** P2P */
    void directPass(){
BRAMAS Berenger's avatar
BRAMAS Berenger committed
311
        FLOG( FLog::Controller.write("\tStart Direct Pass\n").write(FLog::Flush); );
BRAMAS Berenger's avatar
BRAMAS Berenger committed
312 313
        FLOG(FTic counterTime);
        FLOG(FTic computationCounter);
314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334

        const int heightMinusOne = OctreeHeight - 1;

        // There is a maximum of 26 neighbors
        ContainerClass* neighbors[27];

        const int SizeShape = 3*3*3;
        FVector<typename OctreeClass::Iterator> shapes[SizeShape];

        typename OctreeClass::Iterator octreeIterator(tree);
        octreeIterator.gotoBottomLeft();

        // for each leafs
        do{
            const FTreeCoordinate& coord = octreeIterator.getCurrentGlobalCoordinate();
            const int shapePosition = (coord.getX()%3)*9 + (coord.getY()%3)*3 + (coord.getZ()%3);

            shapes[shapePosition].push(octreeIterator);

        } while(octreeIterator.moveRight());

BRAMAS Berenger's avatar
BRAMAS Berenger committed
335
        FLOG( computationCounter.tic() );
336 337

        for( int idxShape = 0 ; idxShape < SizeShape ; ++idxShape){
338
            const FSize nbLeaf = shapes[idxShape].getSize();
339 340 341 342 343 344 345 346 347 348 349 350 351
            for(int iterLeaf = 0 ; iterLeaf < nbLeaf ; ++iterLeaf ){
                typename OctreeClass::Iterator toWork = shapes[idxShape][iterLeaf];
                #pragma omp task firstprivate(neighbors, toWork)
                {
                    const int counter = tree->getLeafsNeighbors(neighbors, toWork.getCurrentGlobalCoordinate(),heightMinusOne);
                    kernels[omp_get_thread_num()]->P2P(toWork.getCurrentGlobalCoordinate(), toWork.getCurrentListTargets(),
                                         toWork.getCurrentListSrc(), neighbors, counter);
                }
            }

            #pragma omp taskwait
        }

BRAMAS Berenger's avatar
BRAMAS Berenger committed
352
        FLOG( computationCounter.tac() );
353 354


BRAMAS Berenger's avatar
BRAMAS Berenger committed
355 356
        FLOG( FLog::Controller << "\tFinished (@Direct Pass (P2P) = "  << counterTime.tacAndElapsed() << "s)\n" );
        FLOG( FLog::Controller << "\t\t Computation P2P : " << computationCounter.cumulated() << " s\n" );
357 358 359
    }

    void L2PPass(){
BRAMAS Berenger's avatar
BRAMAS Berenger committed
360
        FLOG( FLog::Controller.write("\tStart L2P Pass\n").write(FLog::Flush); );
BRAMAS Berenger's avatar
BRAMAS Berenger committed
361
        FLOG(FTic counterTime);
362 363 364 365 366 367 368 369 370 371 372 373 374 375

        typename OctreeClass::Iterator octreeIterator(tree);
        octreeIterator.gotoBottomLeft();

        // for each leafs
        do{
            #pragma omp task firstprivate(octreeIterator)
            {
                kernels[omp_get_thread_num()]->L2P(octreeIterator.getCurrentCell(), octreeIterator.getCurrentListTargets());
            }
        } while(octreeIterator.moveRight());

        #pragma omp taskwait

BRAMAS Berenger's avatar
BRAMAS Berenger committed
376
        FLOG( FLog::Controller << "\tFinished (@Direct Pass (L2P) = "  << counterTime.tacAndElapsed() << "s)\n" );
377 378 379 380 381 382 383 384
    }

};


#endif //FFMMALGORITHMTASK_HPP