FFmmAlgorithmTask.hpp 12.4 KB
Newer Older
1
// ===================================================================================
2 3 4 5 6 7 8 9
// Logiciel initial: ScalFmm Version 0.5
// Co-auteurs : Olivier Coulaud, Bérenger Bramas.
// Propriétaires : INRIA.
// Copyright © 2011-2012, diffusé sous les termes et conditions d’une licence propriétaire.
// Initial software: ScalFmm Version 0.5
// Co-authors: Olivier Coulaud, Bérenger Bramas.
// Owners: INRIA.
// Copyright © 2011-2012, spread under the terms and conditions of a proprietary license.
10
// ===================================================================================
11 12
#ifndef FFMMALGORITHMTASK_HPP
#define FFMMALGORITHMTASK_HPP
13

14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52

#include "../Utils/FGlobal.hpp"
#include "../Utils/FAssertable.hpp"
#include "../Utils/FDebug.hpp"
#include "../Utils/FTrace.hpp"
#include "../Utils/FTic.hpp"

#include "../Containers/FOctree.hpp"
#include "../Containers/FVector.hpp"


/**
* @author Berenger Bramas (berenger.bramas@inria.fr)
* @class FFmmAlgorithmTask
* @brief
* Please read the license
*
* This class is a basic FMM algorithm
* It just iterates on a tree and call the kernels with good arguments.
*
* Of course this class does not deallocate pointer given in arguements.
*/
template<class OctreeClass, class ParticleClass, class CellClass, class ContainerClass, class KernelClass, class LeafClass>
class FFmmAlgorithmTask : protected FAssertable{

    OctreeClass* const tree;       //< The octree to work on
    KernelClass** kernels;    //< The kernels

    const int MaxThreads;

    const int OctreeHeight;

public:	
    /** The constructor need the octree and the kernels used for computation
      * @param inTree the octree to work on
      * @param inKernels the kernels to call
      * An assert is launched if one of the arguments is null
      */
    FFmmAlgorithmTask(OctreeClass* const inTree, KernelClass* const inKernels)
53 54
        : tree(inTree) , kernels(0),
          MaxThreads(omp_get_max_threads()), OctreeHeight(tree->getHeight()) {
55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85

        fassert(tree, "tree cannot be null", __LINE__, __FILE__);
        fassert(inKernels, "kernels cannot be null", __LINE__, __FILE__);

        this->kernels = new KernelClass*[MaxThreads];
        for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread){
            this->kernels[idxThread] = new KernelClass(*inKernels);
        }

        FDEBUG(FDebug::Controller << "FFmmAlgorithmTask\n");
    }

    /** Default destructor */
    virtual ~FFmmAlgorithmTask(){
        for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread){
            delete this->kernels[idxThread];
        }
        delete [] this->kernels;
    }

    /**
      * To execute the fmm algorithm
      * Call this function to run the complete algorithm
      */
    void execute(){
        FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) );

        bottomPass();

        upwardPass();

86 87
        transferPass();

88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103
        downardPass();

        directPass();         
    }

private:
    /////////////////////////////////////////////////////////////////////////////
    // P2M
    /////////////////////////////////////////////////////////////////////////////

    /** P2M */
    void bottomPass(){
        FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) );
        FDEBUG( FDebug::Controller.write("\tStart Bottom Pass\n").write(FDebug::Flush) );
        FDEBUG(FTic counterTime);

104
#pragma omp parallel
105 106 107
        {
            KernelClass*const myThreadkernels = kernels[omp_get_thread_num()];

108
#pragma omp single nowait
109 110 111 112 113 114 115 116
            {
                typename OctreeClass::Iterator octreeIterator(tree);

                // Iterate on leafs
                octreeIterator.gotoBottomLeft();
                do{
                    // We need the current cell that represent the leaf
                    // and the list of particles
117
#pragma omp task
118 119 120 121 122 123
                    {
                        myThreadkernels->P2M( octreeIterator.getCurrentCell() , octreeIterator.getCurrentListSrc());
                    }

                } while(octreeIterator.moveRight());

124
#pragma omp taskwait
125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
            }
        }

        FDEBUG( FDebug::Controller << "\tFinished (@Bottom Pass (P2M) = "  << counterTime.tacAndElapsed() << "s)\n" );
    }

    /////////////////////////////////////////////////////////////////////////////
    // Upward
    /////////////////////////////////////////////////////////////////////////////

    /** M2M */
    void upwardPass(){
        FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) );
        FDEBUG( FDebug::Controller.write("\tStart Upward Pass\n").write(FDebug::Flush); );
        FDEBUG(FTic counterTime);

141
#pragma omp parallel
142 143 144
        {
            KernelClass*const myThreadkernels = kernels[omp_get_thread_num()];

145
#pragma omp single nowait
146 147 148 149 150 151 152 153 154 155 156 157 158 159
            {
                // Start from leal level - 1
                typename OctreeClass::Iterator octreeIterator(tree);
                octreeIterator.gotoBottomLeft();
                octreeIterator.moveUp();

                typename OctreeClass::Iterator avoidGotoLeftIterator(octreeIterator);

                // for each levels
                for(int idxLevel = OctreeHeight - 2 ; idxLevel > 1 ; --idxLevel ){
                    // for each cells
                    do{
                        // We need the current cell and the child
                        // child is an array (of 8 child) that may be null
160
#pragma omp task
161 162 163 164 165 166 167 168
                        {
                            myThreadkernels->M2M( octreeIterator.getCurrentCell() , octreeIterator.getCurrentChild(), idxLevel);
                        }
                    } while(octreeIterator.moveRight());

                    avoidGotoLeftIterator.moveUp();
                    octreeIterator = avoidGotoLeftIterator;// equal octreeIterator.moveUp(); octreeIterator.gotoLeft();

169
#pragma omp taskwait
170 171 172 173 174 175 176 177 178
                }
            }
        }


        FDEBUG( FDebug::Controller << "\tFinished (@Upward Pass (M2M) = "  << counterTime.tacAndElapsed() << "s)\n" );
    }

    /////////////////////////////////////////////////////////////////////////////
179
    // Transfer
180 181 182
    /////////////////////////////////////////////////////////////////////////////

    /** M2L L2L */
183
    void transferPass(){
184 185
        FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) );

186 187 188 189 190
        FDEBUG( FDebug::Controller.write("\tStart Downward Pass (M2L)\n").write(FDebug::Flush); );
        FDEBUG(FTic counterTime);
#pragma omp parallel
        {
            KernelClass*const myThreadkernels = kernels[omp_get_thread_num()];
191
            const CellClass* neighbors[343];
192 193

#pragma omp single nowait
194
            {
195 196 197 198 199 200 201 202 203 204
                typename OctreeClass::Iterator octreeIterator(tree);
                octreeIterator.moveDown();

                typename OctreeClass::Iterator avoidGotoLeftIterator(octreeIterator);


                // for each levels
                for(int idxLevel = 2 ; idxLevel < OctreeHeight ; ++idxLevel ){
                    // for each cells
                    do{
205
                        const int counter = tree->getInteractionNeighbors(neighbors, octreeIterator.getCurrentGlobalCoordinate(), idxLevel);
206 207 208 209
                        if(counter){
#pragma omp task
                            {
                                myThreadkernels->M2L( octreeIterator.getCurrentCell() , neighbors, counter, idxLevel);
210
                            }
211 212
                        }
                    } while(octreeIterator.moveRight());
213

214 215
                    avoidGotoLeftIterator.moveDown();
                    octreeIterator = avoidGotoLeftIterator;
216

217
#pragma omp taskwait
218 219 220
                }
            }
        }
221 222 223 224 225 226
        FDEBUG( FDebug::Controller << "\tFinished (@Downward Pass (M2L) = "  << counterTime.tacAndElapsed() << "s)\n" );
    }

    /////////////////////////////////////////////////////////////////////////////
    // Downward
    /////////////////////////////////////////////////////////////////////////////
227

228 229 230 231
    void downardPass(){ // second L2L
        FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) );
        FDEBUG( FDebug::Controller.write("\tStart Downward Pass (L2L)\n").write(FDebug::Flush); );
        FDEBUG(FTic counterTime);
232

233 234 235
#pragma omp parallel
        {
            KernelClass*const myThreadkernels = kernels[omp_get_thread_num()];
236

237 238 239 240
#pragma omp single nowait
            {
                typename OctreeClass::Iterator octreeIterator(tree);
                octreeIterator.moveDown();
241

242
                typename OctreeClass::Iterator avoidGotoLeftIterator(octreeIterator);
243

244 245 246 247 248 249 250 251 252
                const int heightMinusOne = OctreeHeight - 1;
                // for each levels exepted leaf level
                for(int idxLevel = 2 ; idxLevel < heightMinusOne ; ++idxLevel ){
                    // for each cells
                    do{
#pragma omp task
                        {
                            myThreadkernels->L2L( octreeIterator.getCurrentCell() , octreeIterator.getCurrentChild(), idxLevel);
                        }
253

254
                    } while(octreeIterator.moveRight());
255

256 257
                    avoidGotoLeftIterator.moveDown();
                    octreeIterator = avoidGotoLeftIterator;
258

259
#pragma omp taskwait
260 261 262 263
                }
            }
        }

264
        FDEBUG( FDebug::Controller << "\tFinished (@Downward Pass (L2L) = "  << counterTime.tacAndElapsed() << "s)\n" );
265 266
    }

267

268 269 270 271 272 273 274 275 276 277 278 279
    /////////////////////////////////////////////////////////////////////////////
    // Direct
    /////////////////////////////////////////////////////////////////////////////

    /** P2P */
    void directPass(){
        FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Fmm" , __FILE__ , __LINE__) );
        FDEBUG( FDebug::Controller.write("\tStart Direct Pass\n").write(FDebug::Flush); );
        FDEBUG(FTic counterTime);

        const int heightMinusOne = OctreeHeight - 1;

berenger-bramas's avatar
berenger-bramas committed
280
        #pragma omp parallel
281 282 283
        {
            KernelClass*const myThreadkernels = kernels[omp_get_thread_num()];
            // There is a maximum of 26 neighbors
berenger-bramas's avatar
berenger-bramas committed
284
            ContainerClass* neighbors[27];
285

berenger-bramas's avatar
berenger-bramas committed
286
            #pragma omp single nowait
287 288
            {
                const int SizeShape = 3*3*3;
berenger-bramas's avatar
berenger-bramas committed
289
                FVector<typename OctreeClass::Iterator> shapes[SizeShape];
290 291 292 293 294 295

                typename OctreeClass::Iterator octreeIterator(tree);
                octreeIterator.gotoBottomLeft();

                // for each leafs
                do{
berenger-bramas's avatar
berenger-bramas committed
296
                    #pragma omp task
297 298 299 300 301 302 303 304
                    {
                        myThreadkernels->L2P(octreeIterator.getCurrentCell(), octreeIterator.getCurrentListTargets());
                    }

                    const FTreeCoordinate& coord = octreeIterator.getCurrentGlobalCoordinate();
                    const int shapePosition = (coord.getX()%3)*9 + (coord.getY()%3)*3 + (coord.getZ()%3);

                    if( shapePosition == 0){
berenger-bramas's avatar
berenger-bramas committed
305
                        #pragma omp task
306 307
                        {
                            // need the current particles and neighbors particles
308 309
                            const int counter = tree->getLeafsNeighbors(neighbors, octreeIterator.getCurrentGlobalCoordinate(),heightMinusOne);
                            myThreadkernels->P2P(octreeIterator.getCurrentGlobalIndex(),octreeIterator.getCurrentListTargets(), octreeIterator.getCurrentListSrc() , neighbors, counter);
310 311 312
                        }
                    }
                    else{
berenger-bramas's avatar
berenger-bramas committed
313
                        shapes[shapePosition].push(octreeIterator);
314 315 316 317
                    }

                } while(octreeIterator.moveRight());

berenger-bramas's avatar
berenger-bramas committed
318
                #pragma omp taskwait
319 320

                for( int idxShape = 1 ; idxShape < SizeShape ; ++idxShape){
berenger-bramas's avatar
berenger-bramas committed
321 322 323
                    int iterLeaf = shapes[idxShape].getSize();
                    while( iterLeaf-- ){
                        typename OctreeClass::Iterator toWork = shapes[idxShape][iterLeaf];
berenger-bramas's avatar
berenger-bramas committed
324
                        #pragma omp task
325
                        {
326
                            const int counter = tree->getLeafsNeighbors(neighbors, toWork.getCurrentGlobalCoordinate(),heightMinusOne);
berenger-bramas's avatar
berenger-bramas committed
327
                            myThreadkernels->P2P(toWork.getCurrentGlobalCoordinate(), toWork.getCurrentListTargets(), neighbors, counter);
328 329 330
                        }
                    }

berenger-bramas's avatar
berenger-bramas committed
331
                    #pragma omp taskwait
332 333 334 335 336 337 338 339 340 341 342 343 344
                }
            }
        }


        FDEBUG( FDebug::Controller << "\tFinished (@Direct Pass (L2P + P2P) = "  << counterTime.tacAndElapsed() << "s)\n" );
    }

};


#endif //FFMMALGORITHMTASK_HPP

345