FGroupTaskStarpuAlgorithm.hpp 46.4 KB
Newer Older
1 2
// Keep in private GIT
// @SCALFMM_PRIVATE
BRAMAS Berenger's avatar
BRAMAS Berenger committed
3 4 5
#ifndef FGROUPTASKSTARPUALGORITHM_HPP
#define FGROUPTASKSTARPUALGORITHM_HPP

6 7 8 9 10 11 12
#include "../../Utils/FGlobal.hpp"
#include "../../Core/FCoreCommon.hpp"
#include "../../Utils/FQuickSort.hpp"
#include "../../Containers/FTreeCoordinate.hpp"
#include "../../Utils/FLog.hpp"
#include "../../Utils/FTic.hpp"
#include "../../Utils/FAssert.hpp"
BRAMAS Berenger's avatar
BRAMAS Berenger committed
13

14 15
#include "FOutOfBlockInteraction.hpp"

BRAMAS Berenger's avatar
BRAMAS Berenger committed
16 17 18 19 20 21
#include <vector>
#include <vector>

#include <omp.h>

#include <starpu.h>
22
#include "../StarPUUtils/FStarPUUtils.hpp"
23
#include "../StarPUUtils/FStarPUFmmPriorities.hpp"
BRAMAS Berenger's avatar
BRAMAS Berenger committed
24

25
#ifdef STARPU_USE_CPU
26
#include "../StarPUUtils/FStarPUCpuWrapper.hpp"
27
#endif
28
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
29 30 31 32 33
#include "../StarPUUtils/FStarPUCudaWrapper.hpp"
#include "../Cuda/FCudaEmptyKernel.hpp"
#include "../Cuda/FCudaGroupAttachedLeaf.hpp"
#include "../Cuda/FCudaGroupOfParticles.hpp"
#include "../Cuda/FCudaGroupOfCells.hpp"
34
#include "../Cuda/FCudaEmptyCellSymb.hpp"
35
#endif
36
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
37 38
#include "../StarPUUtils/FStarPUOpenClWrapper.hpp"
#include "../OpenCl/FOpenCLDeviceWrapper.hpp"
39 40
#endif

41

42
template <class OctreeClass, class CellContainerClass, class KernelClass, class ParticleGroupClass, class StarPUCpuWrapperClass
43 44 45 46 47 48 49
          #ifdef SCALFMM_ENABLE_CUDA_KERNEL
          , class StarPUCudaWrapperClass = FStarPUCudaWrapper<KernelClass, FCudaEmptyCellSymb, int, int, FCudaGroupOfCells<FCudaEmptyCellSymb, int, int>,
                                                              FCudaGroupOfParticles<int, 0, 0, int>, FCudaGroupAttachedLeaf<int, 0, 0, int>, FCudaEmptyKernel<int> >
          #endif
          #ifdef SCALFMM_ENABLE_OPENCL_KERNEL
          , class StarPUOpenClWrapperClass = FStarPUOpenClWrapper<KernelClass, FOpenCLDeviceWrapper<KernelClass>>
          #endif
50
          >
51
class FGroupTaskStarPUAlgorithm : public FAbstractAlgorithm {
BRAMAS Berenger's avatar
BRAMAS Berenger committed
52
protected:
53
    typedef FGroupTaskStarPUAlgorithm<OctreeClass, CellContainerClass, KernelClass, ParticleGroupClass, StarPUCpuWrapperClass
54
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
55
    , StarPUCudaWrapperClass
56
#endif
57
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
58
    , StarPUOpenClWrapperClass
59 60
#endif
    > ThisClass;
BRAMAS Berenger's avatar
BRAMAS Berenger committed
61 62 63 64 65 66 67 68

    template <class OtherBlockClass>
    struct BlockInteractions{
        OtherBlockClass* otherBlock;
        int otherBlockId;
        std::vector<OutOfBlockInteraction> interactions;
    };

69 70 71 72
    struct CellHandles{
        starpu_data_handle_t symb;
        starpu_data_handle_t up;
        starpu_data_handle_t down;
73
        int intervalSize;
74 75 76 77 78
    };

    struct ParticleHandles{
        starpu_data_handle_t symb;
        starpu_data_handle_t down;
79
        int intervalSize;
80 81
    };

BRAMAS Berenger's avatar
BRAMAS Berenger committed
82 83 84 85
    std::vector< std::vector< std::vector<BlockInteractions<CellContainerClass>>>> externalInteractionsAllLevel;
    std::vector< std::vector<BlockInteractions<ParticleGroupClass>>> externalInteractionsLeafLevel;

    OctreeClass*const tree;       //< The Tree
86
    KernelClass*const originalCpuKernel;
BRAMAS Berenger's avatar
BRAMAS Berenger committed
87

88 89
    std::vector<CellHandles>* cellHandles;
    std::vector<ParticleHandles> particleHandles;
BRAMAS Berenger's avatar
BRAMAS Berenger committed
90 91 92 93 94 95 96 97 98 99 100 101

    starpu_codelet p2m_cl;
    starpu_codelet m2m_cl[9];
    starpu_codelet l2l_cl[9];
    starpu_codelet l2p_cl;

    starpu_codelet m2l_cl_in;
    starpu_codelet m2l_cl_inout;

    starpu_codelet p2p_cl_in;
    starpu_codelet p2p_cl_inout;

BRAMAS Berenger's avatar
BRAMAS Berenger committed
102
#ifdef STARPU_USE_CPU
103
    StarPUCpuWrapperClass cpuWrapper;
104
#endif
105
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
106 107
    StarPUCudaWrapperClass cudaWrapper;
#endif
108
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
109 110
    StarPUOpenClWrapperClass openclWrapper;
#endif
111 112 113

    FStarPUPtrInterface wrappers;
    FStarPUPtrInterface* wrapperptr;
114

115 116 117 118 119
#ifdef STARPU_SUPPORT_ARBITER
    starpu_arbiter_t arbiterPole;
    starpu_arbiter_t arbiterLocal;
    starpu_arbiter_t arbiterParticles;
#endif
BRAMAS Berenger's avatar
BRAMAS Berenger committed
120
public:
121 122
    FGroupTaskStarPUAlgorithm(OctreeClass*const inTree, KernelClass* inKernels)
        : tree(inTree), originalCpuKernel(inKernels),
123
          cellHandles(nullptr),
124 125 126 127 128 129 130 131 132 133
      #ifdef STARPU_USE_CPU
          cpuWrapper(tree->getHeight()),
      #endif
      #ifdef SCALFMM_ENABLE_CUDA_KERNEL
          cudaWrapper(tree->getHeight()),
      #endif
      #ifdef SCALFMM_ENABLE_OPENCL_KERNEL
          openclWrapper(tree->getHeight()),
      #endif
          wrapperptr(&wrappers){
BRAMAS Berenger's avatar
BRAMAS Berenger committed
134 135 136
        FAssertLF(tree, "tree cannot be null");
        FAssertLF(inKernels, "kernels cannot be null");

137 138
        FAbstractAlgorithm::setNbLevelsInTree(tree->getHeight());

139 140
        struct starpu_conf conf;
        FAssertLF(starpu_conf_init(&conf) == 0);
141
        FStarPUFmmPriorities::Controller().init(&conf, tree->getHeight(), inKernels);
142
        FAssertLF(starpu_init(&conf) == 0);
143 144 145

        starpu_pthread_mutex_t initMutex;
        starpu_pthread_mutex_init(&initMutex, NULL);
146
#ifdef STARPU_USE_CPU
147 148 149 150 151
        FStarPUUtils::ExecOnWorkers(STARPU_CPU, [&](){
            starpu_pthread_mutex_lock(&initMutex);
            cpuWrapper.initKernel(starpu_worker_get_id(), inKernels);
            starpu_pthread_mutex_unlock(&initMutex);
        });
152
        wrappers.set(FSTARPU_CPU_IDX, &cpuWrapper);
153
#endif
154
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
155 156 157 158 159 160 161
        FStarPUUtils::ExecOnWorkers(STARPU_CUDA, [&](){
            starpu_pthread_mutex_lock(&initMutex);
            cudaWrapper.initKernel(starpu_worker_get_id(), inKernels);
            starpu_pthread_mutex_unlock(&initMutex);
        });
        wrappers.set(FSTARPU_CUDA_IDX, &cudaWrapper);
#endif
162
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
163 164 165 166 167 168
        FStarPUUtils::ExecOnWorkers(STARPU_OPENCL, [&](){
            starpu_pthread_mutex_lock(&initMutex);
            openclWrapper.initKernel(starpu_worker_get_id(), inKernels);
            starpu_pthread_mutex_unlock(&initMutex);
        });
        wrappers.set(FSTARPU_OPENCL_IDX, &openclWrapper);
169
#endif
170 171
        starpu_pthread_mutex_destroy(&initMutex);

172 173
        starpu_pause();

174
        cellHandles   = new std::vector<CellHandles>[tree->getHeight()];
175

176
        initCodelet();
BRAMAS Berenger's avatar
BRAMAS Berenger committed
177
        rebuildInteractions();
BRAMAS Berenger's avatar
BRAMAS Berenger committed
178

179 180 181 182 183 184
#ifdef STARPU_SUPPORT_ARBITER
        arbiterPole = starpu_arbiter_create();
        arbiterLocal = starpu_arbiter_create();
        arbiterParticles = starpu_arbiter_create();
#endif

BRAMAS Berenger's avatar
BRAMAS Berenger committed
185
        FLOG(FLog::Controller << "FGroupTaskStarPUAlgorithm (Max Worker " << starpu_worker_get_count() << ")\n");
BRAMAS Berenger's avatar
BRAMAS Berenger committed
186
#ifdef STARPU_USE_CPU
BRAMAS Berenger's avatar
BRAMAS Berenger committed
187
        FLOG(FLog::Controller << "FGroupTaskStarPUAlgorithm (Max CPU " << starpu_cpu_worker_get_count() << ")\n");
BRAMAS Berenger's avatar
BRAMAS Berenger committed
188
#endif
189
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
BRAMAS Berenger's avatar
BRAMAS Berenger committed
190 191
        FLOG(FLog::Controller << "FGroupTaskStarPUAlgorithm (Max OpenCL " << starpu_opencl_worker_get_count() << ")\n");
#endif
192
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
BRAMAS Berenger's avatar
BRAMAS Berenger committed
193 194
        FLOG(FLog::Controller << "FGroupTaskStarPUAlgorithm (Max CUDA " << starpu_cuda_worker_get_count() << ")\n");
#endif
BRAMAS Berenger's avatar
BRAMAS Berenger committed
195 196 197
    }

    ~FGroupTaskStarPUAlgorithm(){
BRAMAS Berenger's avatar
BRAMAS Berenger committed
198 199
        starpu_resume();

BRAMAS Berenger's avatar
BRAMAS Berenger committed
200
        cleanHandle();
201
        delete[] cellHandles;
BRAMAS Berenger's avatar
BRAMAS Berenger committed
202

BRAMAS Berenger's avatar
BRAMAS Berenger committed
203 204 205 206 207 208 209 210 211 212
        starpu_pthread_mutex_t releaseMutex;
        starpu_pthread_mutex_init(&releaseMutex, NULL);
#ifdef STARPU_USE_CPU
        FStarPUUtils::ExecOnWorkers(STARPU_CPU, [&](){
            starpu_pthread_mutex_lock(&releaseMutex);
            cpuWrapper.releaseKernel(starpu_worker_get_id());
            starpu_pthread_mutex_unlock(&releaseMutex);
        });
        wrappers.set(FSTARPU_CPU_IDX, &cpuWrapper);
#endif
213
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
BRAMAS Berenger's avatar
BRAMAS Berenger committed
214 215 216 217 218 219 220
        FStarPUUtils::ExecOnWorkers(STARPU_CUDA, [&](){
            starpu_pthread_mutex_lock(&releaseMutex);
            cudaWrapper.releaseKernel(starpu_worker_get_id());
            starpu_pthread_mutex_unlock(&releaseMutex);
        });
        wrappers.set(FSTARPU_CUDA_IDX, &cudaWrapper);
#endif
221
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
BRAMAS Berenger's avatar
BRAMAS Berenger committed
222 223 224 225 226 227 228 229 230
        FStarPUUtils::ExecOnWorkers(STARPU_OPENCL, [&](){
            starpu_pthread_mutex_lock(&releaseMutex);
            openclWrapper.releaseKernel(starpu_worker_get_id());
            starpu_pthread_mutex_unlock(&releaseMutex);
        });
        wrappers.set(FSTARPU_OPENCL_IDX, &openclWrapper);
#endif
        starpu_pthread_mutex_destroy(&releaseMutex);

231 232 233 234 235 236 237

#ifdef STARPU_SUPPORT_ARBITER
        starpu_arbiter_destroy(arbiterPole);
        starpu_arbiter_destroy(arbiterLocal);
        starpu_arbiter_destroy(arbiterParticles);
#endif

BRAMAS Berenger's avatar
BRAMAS Berenger committed
238 239 240
        starpu_shutdown();
    }

BRAMAS Berenger's avatar
BRAMAS Berenger committed
241 242 243 244 245 246 247 248
    void rebuildInteractions(){
        #pragma omp parallel
        #pragma omp single
        buildExternalInteractionVecs();

        buildHandles();
    }

249 250 251 252 253
protected:
    /**
      * Runs the complete algorithm.
      */
    void executeCore(const unsigned operationsToProceed) override {
BRAMAS Berenger's avatar
BRAMAS Berenger committed
254
        FLOG( FLog::Controller << "\tStart FGroupTaskStarPUAlgorithm\n" );
255
        const bool directOnly = (tree->getHeight() <= 2);
BRAMAS Berenger's avatar
BRAMAS Berenger committed
256 257 258

        starpu_resume();

259
        if(operationsToProceed & FFmmP2M && !directOnly) bottomPass();
BRAMAS Berenger's avatar
BRAMAS Berenger committed
260

261
        if(operationsToProceed & FFmmM2M && !directOnly) upwardPass();
BRAMAS Berenger's avatar
BRAMAS Berenger committed
262

263
        if(operationsToProceed & FFmmM2L && !directOnly) transferPass();
BRAMAS Berenger's avatar
BRAMAS Berenger committed
264

265
        if(operationsToProceed & FFmmL2L && !directOnly) downardPass();
BRAMAS Berenger's avatar
BRAMAS Berenger committed
266

BRAMAS Berenger's avatar
BRAMAS Berenger committed
267 268
        if( operationsToProceed & FFmmP2P ) directPass();

269
        if( operationsToProceed & FFmmL2P && !directOnly) mergePass();
BRAMAS Berenger's avatar
BRAMAS Berenger committed
270 271 272 273 274

        starpu_task_wait_for_all();
        starpu_pause();
    }

275

BRAMAS Berenger's avatar
BRAMAS Berenger committed
276 277
    void initCodelet(){
        memset(&p2m_cl, 0, sizeof(p2m_cl));
278
#ifdef STARPU_USE_CPU
279
        if(originalCpuKernel->supportP2M(FSTARPU_CPU_IDX)){
280 281 282
            p2m_cl.cpu_funcs[0] = StarPUCpuWrapperClass::bottomPassCallback;
            p2m_cl.where |= STARPU_CPU;
        }
283
#endif
284
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
285
        if(originalCpuKernel->supportP2M(FSTARPU_CUDA_IDX)){
286 287 288 289
            p2m_cl.cuda_funcs[0] = StarPUCudaWrapperClass::bottomPassCallback;
            p2m_cl.where |= STARPU_CUDA;
        }
#endif
290
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
291
        if(originalCpuKernel->supportP2M(FSTARPU_OPENCL_IDX)){
292 293 294
            p2m_cl.opencl_funcs[0] = StarPUOpenClWrapperClass::bottomPassCallback;
            p2m_cl.where |= STARPU_OPENCL;
        }
295
#endif
296 297 298 299
        p2m_cl.nbuffers = 3;
        p2m_cl.modes[0] = STARPU_R;
        p2m_cl.modes[1] = STARPU_RW;
        p2m_cl.modes[2] = STARPU_R;
300
        p2m_cl.name = "p2m_cl";
BRAMAS Berenger's avatar
BRAMAS Berenger committed
301 302 303 304

        memset(m2m_cl, 0, sizeof(m2m_cl[0])*9);
        memset(l2l_cl, 0, sizeof(l2l_cl[0])*9);
        for(int idx = 0 ; idx < 9 ; ++idx){
305
#ifdef STARPU_USE_CPU
306
            if(originalCpuKernel->supportM2M(FSTARPU_CPU_IDX)){
307 308 309
                m2m_cl[idx].cpu_funcs[0] = StarPUCpuWrapperClass::upwardPassCallback;
                m2m_cl[idx].where |= STARPU_CPU;
            }
310
#endif
311
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
312
            if(originalCpuKernel->supportM2M(FSTARPU_CUDA_IDX)){
313 314 315 316
                m2m_cl[idx].cuda_funcs[0] = StarPUCudaWrapperClass::upwardPassCallback;
                m2m_cl[idx].where |= STARPU_CUDA;
            }
#endif
317
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
318
            if(originalCpuKernel->supportM2M(FSTARPU_OPENCL_IDX)){
319 320 321
                m2m_cl[idx].opencl_funcs[0] = StarPUOpenClWrapperClass::upwardPassCallback;
                m2m_cl[idx].where |= STARPU_OPENCL;
            }
322
#endif
323 324 325 326
            m2m_cl[idx].nbuffers = (idx+2)*2;
            m2m_cl[idx].dyn_modes = (starpu_data_access_mode*)malloc(m2m_cl[idx].nbuffers*sizeof(starpu_data_access_mode));
            m2m_cl[idx].dyn_modes[0] = STARPU_R;
            m2m_cl[idx].dyn_modes[1] = STARPU_RW;
327
            m2m_cl[idx].name = "m2m_cl";
BRAMAS Berenger's avatar
BRAMAS Berenger committed
328

329
#ifdef STARPU_USE_CPU
330
            if(originalCpuKernel->supportL2L(FSTARPU_CPU_IDX)){
331 332 333
                l2l_cl[idx].cpu_funcs[0] = StarPUCpuWrapperClass::downardPassCallback;
                l2l_cl[idx].where |= STARPU_CPU;
            }
334
#endif
335
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
336
            if(originalCpuKernel->supportL2L(FSTARPU_CUDA_IDX)){
337 338 339 340
                l2l_cl[idx].cuda_funcs[0] = StarPUCudaWrapperClass::downardPassCallback;
                l2l_cl[idx].where |= STARPU_CUDA;
            }
#endif
341
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
342
            if(originalCpuKernel->supportL2L(FSTARPU_OPENCL_IDX)){
343 344 345
                l2l_cl[idx].opencl_funcs[0] = StarPUOpenClWrapperClass::downardPassCallback;
                l2l_cl[idx].where |= STARPU_OPENCL;
            }
346
#endif
347 348
            l2l_cl[idx].nbuffers = (idx+2)*2;
            l2l_cl[idx].dyn_modes = (starpu_data_access_mode*)malloc(l2l_cl[idx].nbuffers*sizeof(starpu_data_access_mode));
BRAMAS Berenger's avatar
BRAMAS Berenger committed
349
            l2l_cl[idx].dyn_modes[0] = STARPU_R;
350
            l2l_cl[idx].dyn_modes[1] = STARPU_R;
351
            l2l_cl[idx].name = "l2l_cl";
BRAMAS Berenger's avatar
BRAMAS Berenger committed
352 353

            for(int idxBuffer = 0 ; idxBuffer <= idx ; ++idxBuffer){
354 355 356
                m2m_cl[idx].dyn_modes[(idxBuffer*2)+2] = STARPU_R;
                m2m_cl[idx].dyn_modes[(idxBuffer*2)+3] = STARPU_R;
                l2l_cl[idx].dyn_modes[(idxBuffer*2)+2] = STARPU_R;
357
                l2l_cl[idx].dyn_modes[(idxBuffer*2)+3] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED);
BRAMAS Berenger's avatar
BRAMAS Berenger committed
358 359 360 361
            }
        }

        memset(&l2p_cl, 0, sizeof(l2p_cl));
362
#ifdef STARPU_USE_CPU
363
        if(originalCpuKernel->supportL2P(FSTARPU_CPU_IDX)){
364 365 366
            l2p_cl.cpu_funcs[0] = StarPUCpuWrapperClass::mergePassCallback;
            l2p_cl.where |= STARPU_CPU;
        }
367
#endif
368
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
369
        if(originalCpuKernel->supportL2P(FSTARPU_CUDA_IDX)){
370 371 372 373
            l2p_cl.cuda_funcs[0] = StarPUCudaWrapperClass::mergePassCallback;
            l2p_cl.where |= STARPU_CUDA;
        }
#endif
374
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
375
        if(originalCpuKernel->supportL2P(FSTARPU_OPENCL_IDX)){
376 377 378
            l2p_cl.opencl_funcs[0] = StarPUOpenClWrapperClass::mergePassCallback;
            l2p_cl.where |= STARPU_OPENCL;
        }
379
#endif
380
        l2p_cl.nbuffers = 4;
BRAMAS Berenger's avatar
BRAMAS Berenger committed
381
        l2p_cl.modes[0] = STARPU_R;
382 383
        l2p_cl.modes[1] = STARPU_R;
        l2p_cl.modes[2] = STARPU_R;
384
        l2p_cl.modes[3] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED);
385
        l2p_cl.name = "l2p_cl";
BRAMAS Berenger's avatar
BRAMAS Berenger committed
386 387

        memset(&p2p_cl_in, 0, sizeof(p2p_cl_in));
388
#ifdef STARPU_USE_CPU
389
        if(originalCpuKernel->supportP2P(FSTARPU_CPU_IDX)){
390 391 392
            p2p_cl_in.cpu_funcs[0] = StarPUCpuWrapperClass::directInPassCallback;
            p2p_cl_in.where |= STARPU_CPU;
        }
393
#endif
394
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
395
        if(originalCpuKernel->supportP2P(FSTARPU_CUDA_IDX)){
396 397 398 399
            p2p_cl_in.cuda_funcs[0] = StarPUCudaWrapperClass::directInPassCallback;
            p2p_cl_in.where |= STARPU_CUDA;
        }
#endif
400
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
401
        if(originalCpuKernel->supportP2P(FSTARPU_OPENCL_IDX)){
402 403 404
            p2p_cl_in.opencl_funcs[0] = StarPUOpenClWrapperClass::directInPassCallback;
            p2p_cl_in.where |= STARPU_OPENCL;
        }
405
#endif
406 407
        p2p_cl_in.nbuffers = 2;
        p2p_cl_in.modes[0] = STARPU_R;
408
        p2p_cl_in.modes[1] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED);
409
        p2p_cl_in.name = "p2p_cl_in";
BRAMAS Berenger's avatar
BRAMAS Berenger committed
410
        memset(&p2p_cl_inout, 0, sizeof(p2p_cl_inout));
411
#ifdef STARPU_USE_CPU
412
        if(originalCpuKernel->supportP2PExtern(FSTARPU_CPU_IDX)){
413 414 415
            p2p_cl_inout.cpu_funcs[0] = StarPUCpuWrapperClass::directInoutPassCallback;
            p2p_cl_inout.where |= STARPU_CPU;
        }
416
#endif
417
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
418
        if(originalCpuKernel->supportP2PExtern(FSTARPU_CUDA_IDX)){
419 420 421 422
            p2p_cl_inout.cuda_funcs[0] = StarPUCudaWrapperClass::directInoutPassCallback;
            p2p_cl_inout.where |= STARPU_CUDA;
        }
#endif
423
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
424
        if(originalCpuKernel->supportP2PExtern(FSTARPU_OPENCL_IDX)){
425 426 427
            p2p_cl_inout.opencl_funcs[0] = StarPUOpenClWrapperClass::directInoutPassCallback;
            p2p_cl_inout.where |= STARPU_OPENCL;
        }
428
#endif
429 430
        p2p_cl_inout.nbuffers = 4;
        p2p_cl_inout.modes[0] = STARPU_R;
431
        p2p_cl_inout.modes[1] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED);
432
        p2p_cl_inout.modes[2] = STARPU_R;
433
        p2p_cl_inout.modes[3] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED);
434
        p2p_cl_inout.name = "p2p_cl_inout";
BRAMAS Berenger's avatar
BRAMAS Berenger committed
435 436

        memset(&m2l_cl_in, 0, sizeof(m2l_cl_in));
437
#ifdef STARPU_USE_CPU
438
        if(originalCpuKernel->supportM2L(FSTARPU_CPU_IDX)){
439 440 441
            m2l_cl_in.cpu_funcs[0] = StarPUCpuWrapperClass::transferInPassCallback;
            m2l_cl_in.where |= STARPU_CPU;
        }
442
#endif
443
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
444
        if(originalCpuKernel->supportM2L(FSTARPU_CUDA_IDX)){
445 446 447 448
            m2l_cl_in.cuda_funcs[0] = StarPUCudaWrapperClass::transferInPassCallback;
            m2l_cl_in.where |= STARPU_CUDA;
        }
#endif
449
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
450
        if(originalCpuKernel->supportM2L(FSTARPU_OPENCL_IDX)){
451 452 453
            m2l_cl_in.opencl_funcs[0] = StarPUOpenClWrapperClass::transferInPassCallback;
            m2l_cl_in.where |= STARPU_OPENCL;
        }
454
#endif
455 456
        m2l_cl_in.nbuffers = 3;
        m2l_cl_in.modes[0] = STARPU_R;
457
        m2l_cl_in.modes[1] = STARPU_R;
458
        m2l_cl_in.modes[2] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED);
459
        m2l_cl_in.name = "m2l_cl_in";
BRAMAS Berenger's avatar
BRAMAS Berenger committed
460
        memset(&m2l_cl_inout, 0, sizeof(m2l_cl_inout));
461
#ifdef STARPU_USE_CPU
462
        if(originalCpuKernel->supportM2LExtern(FSTARPU_CPU_IDX)){
463 464 465
            m2l_cl_inout.cpu_funcs[0] = StarPUCpuWrapperClass::transferInoutPassCallback;
            m2l_cl_inout.where |= STARPU_CPU;
        }
466
#endif
467
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
468
        if(originalCpuKernel->supportM2LExtern(FSTARPU_CUDA_IDX)){
469 470 471 472
            m2l_cl_inout.cuda_funcs[0] = StarPUCudaWrapperClass::transferInoutPassCallback;
            m2l_cl_inout.where |= STARPU_CUDA;
        }
#endif
473
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
474
        if(originalCpuKernel->supportM2LExtern(FSTARPU_OPENCL_IDX)){
475 476 477
            m2l_cl_inout.opencl_funcs[0] = StarPUOpenClWrapperClass::transferInoutPassCallback;
            m2l_cl_inout.where |= STARPU_OPENCL;
        }
478
#endif
479 480 481
        m2l_cl_inout.nbuffers = 6;
        m2l_cl_inout.modes[0] = STARPU_R;
        m2l_cl_inout.modes[1] = STARPU_R;
482
        m2l_cl_inout.modes[2] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED);
483
        m2l_cl_inout.modes[3] = STARPU_R;
484
        m2l_cl_inout.modes[4] = STARPU_R;
485
        m2l_cl_inout.modes[5] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED);
486
        m2l_cl_inout.name = "m2l_cl_inout";
BRAMAS Berenger's avatar
BRAMAS Berenger committed
487 488 489 490 491
    }

    /** dealloc in a starpu way all the defined handles */
    void cleanHandle(){
        for(int idxLevel = 0 ; idxLevel < tree->getHeight() ; ++idxLevel){
492 493 494 495
            for(int idxHandle = 0 ; idxHandle < int(cellHandles[idxLevel].size()) ; ++idxHandle){
                starpu_data_unregister(cellHandles[idxLevel][idxHandle].symb);
                starpu_data_unregister(cellHandles[idxLevel][idxHandle].up);
                starpu_data_unregister(cellHandles[idxLevel][idxHandle].down);
496
            }
497
            cellHandles[idxLevel].clear();
BRAMAS Berenger's avatar
BRAMAS Berenger committed
498 499
        }
        {
500 501 502
            for(int idxHandle = 0 ; idxHandle < int(particleHandles.size()) ; ++idxHandle){
                starpu_data_unregister(particleHandles[idxHandle].symb);
                starpu_data_unregister(particleHandles[idxHandle].down);
BRAMAS Berenger's avatar
BRAMAS Berenger committed
503
            }
504
            particleHandles.clear();
BRAMAS Berenger's avatar
BRAMAS Berenger committed
505 506 507 508 509 510 511 512 513 514
        }
    }

    /** Reset the handles array and create new ones to define
     * in a starpu way each block of data
     */
    void buildHandles(){
        cleanHandle();

        for(int idxLevel = 2 ; idxLevel < tree->getHeight() ; ++idxLevel){
515
            cellHandles[idxLevel].resize(tree->getNbCellGroupAtLevel(idxLevel));
BRAMAS Berenger's avatar
BRAMAS Berenger committed
516 517 518

            for(int idxGroup = 0 ; idxGroup < tree->getNbCellGroupAtLevel(idxLevel) ; ++idxGroup){
                const CellContainerClass* currentCells = tree->getCellGroup(idxLevel, idxGroup);
519
                starpu_variable_data_register(&cellHandles[idxLevel][idxGroup].symb, 0,
520
                                              (uintptr_t)currentCells->getRawBuffer(), currentCells->getBufferSizeInByte());
521 522 523 524
                starpu_variable_data_register(&cellHandles[idxLevel][idxGroup].up, 0,
                                              (uintptr_t)currentCells->getRawMultipoleBuffer(), currentCells->getMultipoleBufferSizeInByte());
                starpu_variable_data_register(&cellHandles[idxLevel][idxGroup].down, 0,
                                              (uintptr_t)currentCells->getRawLocalBuffer(), currentCells->getLocalBufferSizeInByte());
525
                cellHandles[idxLevel][idxGroup].intervalSize = int(currentCells->getNumberOfCellsInBlock());
526 527 528 529
#ifdef STARPU_SUPPORT_ARBITER
                starpu_data_assign_arbiter(cellHandles[idxLevel][idxGroup].up, arbiterPole);
                starpu_data_assign_arbiter(cellHandles[idxLevel][idxGroup].down, arbiterLocal);
#endif
BRAMAS Berenger's avatar
BRAMAS Berenger committed
530 531 532
            }
        }
        {
533
            particleHandles.resize(tree->getNbParticleGroup());
BRAMAS Berenger's avatar
BRAMAS Berenger committed
534 535
            for(int idxGroup = 0 ; idxGroup < tree->getNbParticleGroup() ; ++idxGroup){
                ParticleGroupClass* containers = tree->getParticleGroup(idxGroup);
536
                starpu_variable_data_register(&particleHandles[idxGroup].symb, 0,
537
                                              (uintptr_t)containers->getRawBuffer(), containers->getBufferSizeInByte());
538 539
                starpu_variable_data_register(&particleHandles[idxGroup].down, 0,
                                              (uintptr_t)containers->getRawAttributesBuffer(), containers->getAttributesBufferSizeInByte());
540
                particleHandles[idxGroup].intervalSize = int(containers->getNumberOfLeavesInBlock());
541 542 543
#ifdef STARPU_SUPPORT_ARBITER
                starpu_data_assign_arbiter(particleHandles[idxGroup].down, arbiterParticles);
#endif
BRAMAS Berenger's avatar
BRAMAS Berenger committed
544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572
            }
        }
    }

    /**
     * This function is creating the interactions vector between blocks.
     * It fills externalInteractionsAllLevel and externalInteractionsLeafLevel.
     * Warning, the omp task for now are using the class attributes!
     *
     */
    void buildExternalInteractionVecs(){
        FLOG( FTic timer; FTic leafTimer; FTic cellTimer; );
        // Reset interactions
        externalInteractionsAllLevel.clear();
        externalInteractionsLeafLevel.clear();
        // One per level + leaf level
        externalInteractionsAllLevel.resize(tree->getHeight());

        // First leaf level
        {
            // We create one big vector per block
            externalInteractionsLeafLevel.resize(tree->getNbParticleGroup());

            for(int idxGroup = 0 ; idxGroup < tree->getNbParticleGroup() ; ++idxGroup){
                // Create the vector
                ParticleGroupClass* containers = tree->getParticleGroup(idxGroup);

                std::vector<BlockInteractions<ParticleGroupClass>>* externalInteractions = &externalInteractionsLeafLevel[idxGroup];

573
#pragma omp task default(none) firstprivate(idxGroup, containers, externalInteractions)
BRAMAS Berenger's avatar
BRAMAS Berenger committed
574 575 576 577 578
                { // Can be a task(inout:iterCells)
                    std::vector<OutOfBlockInteraction> outsideInteractions;
                    const MortonIndex blockStartIdx = containers->getStartingIndex();
                    const MortonIndex blockEndIdx   = containers->getEndingIndex();

579 580 581
                    for(int leafIdx = 0 ; leafIdx < containers->getNumberOfLeavesInBlock() ; ++leafIdx){
                        const MortonIndex mindex = containers->getLeafMortonIndex(leafIdx);
                        // ParticleContainerClass particles = containers->template getLeaf<ParticleContainerClass>(leafIdx);
BRAMAS Berenger's avatar
BRAMAS Berenger committed
582

583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598
                        MortonIndex interactionsIndexes[26];
                        int interactionsPosition[26];
                        FTreeCoordinate coord(mindex, tree->getHeight()-1);
                        int counter = coord.getNeighborsIndexes(tree->getHeight(),interactionsIndexes,interactionsPosition);

                        for(int idxInter = 0 ; idxInter < counter ; ++idxInter){
                            if( blockStartIdx <= interactionsIndexes[idxInter] && interactionsIndexes[idxInter] < blockEndIdx ){
                                // Inside block interaction, do nothing
                            }
                            else if(interactionsIndexes[idxInter] < mindex){
                                OutOfBlockInteraction property;
                                property.insideIndex = mindex;
                                property.outIndex    = interactionsIndexes[idxInter];
                                property.outPosition = interactionsPosition[idxInter];
                                property.insideIdxInBlock = leafIdx;
                                outsideInteractions.push_back(property);
BRAMAS Berenger's avatar
BRAMAS Berenger committed
599 600 601 602 603 604 605 606 607 608
                            }
                        }
                    }

                    // Sort to match external order
                    FQuickSort<OutOfBlockInteraction, int>::QsSequential(outsideInteractions.data(),int(outsideInteractions.size()));

                    int currentOutInteraction = 0;
                    for(int idxLeftGroup = 0 ; idxLeftGroup < idxGroup && currentOutInteraction < int(outsideInteractions.size()) ; ++idxLeftGroup){
                        ParticleGroupClass* leftContainers = tree->getParticleGroup(idxLeftGroup);
BRAMAS Berenger's avatar
BRAMAS Berenger committed
609 610
                        const MortonIndex blockStartIdxOther    = leftContainers->getStartingIndex();
                        const MortonIndex blockEndIdxOther      = leftContainers->getEndingIndex();
BRAMAS Berenger's avatar
BRAMAS Berenger committed
611

BRAMAS Berenger's avatar
BRAMAS Berenger committed
612
                        while(currentOutInteraction < int(outsideInteractions.size()) && outsideInteractions[currentOutInteraction].outIndex < blockStartIdxOther){
BRAMAS Berenger's avatar
BRAMAS Berenger committed
613 614 615 616
                            currentOutInteraction += 1;
                        }

                        int lastOutInteraction = currentOutInteraction;
BRAMAS Berenger's avatar
BRAMAS Berenger committed
617
                        while(lastOutInteraction < int(outsideInteractions.size()) && outsideInteractions[lastOutInteraction].outIndex < blockEndIdxOther){
BRAMAS Berenger's avatar
BRAMAS Berenger committed
618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644
                            lastOutInteraction += 1;
                        }

                        const int nbInteractionsBetweenBlocks = (lastOutInteraction-currentOutInteraction);
                        if(nbInteractionsBetweenBlocks){
                            externalInteractions->emplace_back();
                            BlockInteractions<ParticleGroupClass>* interactions = &externalInteractions->back();
                            interactions->otherBlock = leftContainers;
                            interactions->otherBlockId = idxLeftGroup;
                            interactions->interactions.resize(nbInteractionsBetweenBlocks);
                            std::copy(outsideInteractions.begin() + currentOutInteraction,
                                      outsideInteractions.begin() + lastOutInteraction,
                                      interactions->interactions.begin());
                        }

                        currentOutInteraction = lastOutInteraction;
                    }
                }
            }
        }
        FLOG( leafTimer.tac(); );
        FLOG( cellTimer.tic(); );
        {
            for(int idxLevel = tree->getHeight()-1 ; idxLevel >= 2 ; --idxLevel){
                externalInteractionsAllLevel[idxLevel].resize(tree->getNbCellGroupAtLevel(idxLevel));

                for(int idxGroup = 0 ; idxGroup < tree->getNbCellGroupAtLevel(idxLevel) ; ++idxGroup){
645
                    CellContainerClass* currentCells = tree->getCellGroup(idxLevel, idxGroup);
BRAMAS Berenger's avatar
BRAMAS Berenger committed
646 647 648

                    std::vector<BlockInteractions<CellContainerClass>>* externalInteractions = &externalInteractionsAllLevel[idxLevel][idxGroup];

649
#pragma omp task default(none) firstprivate(idxGroup, currentCells, idxLevel, externalInteractions)
BRAMAS Berenger's avatar
BRAMAS Berenger committed
650 651 652 653 654
                    {
                        std::vector<OutOfBlockInteraction> outsideInteractions;
                        const MortonIndex blockStartIdx = currentCells->getStartingIndex();
                        const MortonIndex blockEndIdx   = currentCells->getEndingIndex();

655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673
                        for(int cellIdx = 0 ; cellIdx < currentCells->getNumberOfCellsInBlock() ; ++cellIdx){
                            const MortonIndex mindex = currentCells->getCellMortonIndex(cellIdx);

                            MortonIndex interactionsIndexes[189];
                            int interactionsPosition[189];
                            const FTreeCoordinate coord(mindex, idxLevel);
                            int counter = coord.getInteractionNeighbors(idxLevel,interactionsIndexes,interactionsPosition);

                            for(int idxInter = 0 ; idxInter < counter ; ++idxInter){
                                if( blockStartIdx <= interactionsIndexes[idxInter] && interactionsIndexes[idxInter] < blockEndIdx ){
                                    // Nothing to do
                                }
                                else if(interactionsIndexes[idxInter] < mindex){
                                    OutOfBlockInteraction property;
                                    property.insideIndex = mindex;
                                    property.outIndex    = interactionsIndexes[idxInter];
                                    property.outPosition = interactionsPosition[idxInter];
                                    property.insideIdxInBlock = cellIdx;
                                    outsideInteractions.push_back(property);
BRAMAS Berenger's avatar
BRAMAS Berenger committed
674 675 676 677 678 679 680 681 682 683
                                }
                            }
                        }

                        // Manage outofblock interaction
                        FQuickSort<OutOfBlockInteraction, int>::QsSequential(outsideInteractions.data(),int(outsideInteractions.size()));

                        int currentOutInteraction = 0;
                        for(int idxLeftGroup = 0 ; idxLeftGroup < idxGroup && currentOutInteraction < int(outsideInteractions.size()) ; ++idxLeftGroup){
                            CellContainerClass* leftCells   = tree->getCellGroup(idxLevel, idxLeftGroup);
BRAMAS Berenger's avatar
BRAMAS Berenger committed
684 685
                            const MortonIndex blockStartIdxOther = leftCells->getStartingIndex();
                            const MortonIndex blockEndIdxOther   = leftCells->getEndingIndex();
BRAMAS Berenger's avatar
BRAMAS Berenger committed
686

BRAMAS Berenger's avatar
BRAMAS Berenger committed
687
                            while(currentOutInteraction < int(outsideInteractions.size()) && outsideInteractions[currentOutInteraction].outIndex < blockStartIdxOther){
BRAMAS Berenger's avatar
BRAMAS Berenger committed
688 689 690 691
                                currentOutInteraction += 1;
                            }

                            int lastOutInteraction = currentOutInteraction;
BRAMAS Berenger's avatar
BRAMAS Berenger committed
692
                            while(lastOutInteraction < int(outsideInteractions.size()) && outsideInteractions[lastOutInteraction].outIndex < blockEndIdxOther){
BRAMAS Berenger's avatar
BRAMAS Berenger committed
693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716
                                lastOutInteraction += 1;
                            }

                            // Create interactions
                            const int nbInteractionsBetweenBlocks = (lastOutInteraction-currentOutInteraction);
                            if(nbInteractionsBetweenBlocks){
                                externalInteractions->emplace_back();
                                BlockInteractions<CellContainerClass>* interactions = &externalInteractions->back();
                                interactions->otherBlock = leftCells;
                                interactions->otherBlockId = idxLeftGroup;
                                interactions->interactions.resize(nbInteractionsBetweenBlocks);
                                std::copy(outsideInteractions.begin() + currentOutInteraction,
                                          outsideInteractions.begin() + lastOutInteraction,
                                          interactions->interactions.begin());
                            }

                            currentOutInteraction = lastOutInteraction;
                        }
                    }
                }
            }
        }
        FLOG( cellTimer.tac(); );

717
#pragma omp taskwait
BRAMAS Berenger's avatar
BRAMAS Berenger committed
718 719 720 721 722 723 724 725 726 727 728 729 730 731 732

        FLOG( FLog::Controller << "\t\t Prepare in " << timer.tacAndElapsed() << "s\n" );
        FLOG( FLog::Controller << "\t\t\t Prepare at leaf level in   " << leafTimer.elapsed() << "s\n" );
        FLOG( FLog::Controller << "\t\t\t Prepare at other levels in " << cellTimer.elapsed() << "s\n" );
    }

    /////////////////////////////////////////////////////////////////////////////////////
    /// Bottom Pass
    /////////////////////////////////////////////////////////////////////////////////////

    void bottomPass(){
        FLOG( FTic timer; );

        for(int idxGroup = 0 ; idxGroup < tree->getNbParticleGroup() ; ++idxGroup){
            starpu_insert_task(&p2m_cl,
733 734
                               STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
                               STARPU_VALUE, &cellHandles[tree->getHeight()-1][idxGroup].intervalSize, sizeof(int),
735
                    STARPU_PRIORITY, FStarPUFmmPriorities::Controller().getPrioP2M(),
736 737 738
                    STARPU_R, cellHandles[tree->getHeight()-1][idxGroup].symb,
                    STARPU_RW, cellHandles[tree->getHeight()-1][idxGroup].up,
                    STARPU_R, particleHandles[idxGroup].symb,
BRAMAS Berenger's avatar
BRAMAS Berenger committed
739 740 741 742 743 744 745 746 747 748 749 750
                    0);
        }

        FLOG( FLog::Controller << "\t\t bottomPass in " << timer.tacAndElapsed() << "s\n" );
    }

    /////////////////////////////////////////////////////////////////////////////////////
    /// Upward Pass
    /////////////////////////////////////////////////////////////////////////////////////

    void upwardPass(){
        FLOG( FTic timer; );
751
        for(int idxLevel = FMath::Min(tree->getHeight() - 2, FAbstractAlgorithm::lowerWorkingLevel - 1) ; idxLevel >= FAbstractAlgorithm::upperWorkingLevel ; --idxLevel){
BRAMAS Berenger's avatar
BRAMAS Berenger committed
752 753 754 755 756 757
            int idxSubGroup = 0;

            for(int idxGroup = 0 ; idxGroup < tree->getNbCellGroupAtLevel(idxLevel) ; ++idxGroup){
                CellContainerClass*const currentCells = tree->getCellGroup(idxLevel, idxGroup);

                struct starpu_task* const task = starpu_task_create();
758 759 760
                task->dyn_handles = (starpu_data_handle_t*)malloc(sizeof(starpu_data_handle_t)*20);
                task->dyn_handles[0] = cellHandles[idxLevel][idxGroup].symb;
                task->dyn_handles[1] = cellHandles[idxLevel][idxGroup].up;
BRAMAS Berenger's avatar
BRAMAS Berenger committed
761 762 763 764 765 766 767

                // Skip current group if needed
                if( tree->getCellGroup(idxLevel+1, idxSubGroup)->getEndingIndex() <= (currentCells->getStartingIndex()<<3) ){
                    ++idxSubGroup;
                    FAssertLF( idxSubGroup != tree->getNbCellGroupAtLevel(idxLevel+1) );
                    FAssertLF( (tree->getCellGroup(idxLevel+1, idxSubGroup)->getStartingIndex()>>3) == currentCells->getStartingIndex() );
                }
BRAMAS Berenger's avatar
BRAMAS Berenger committed
768

BRAMAS Berenger's avatar
BRAMAS Berenger committed
769 770
                // Copy at max 8 groups
                int nbSubCellGroups = 0;
771 772
                task->dyn_handles[(nbSubCellGroups*2) + 2] = cellHandles[idxLevel+1][idxSubGroup].symb;
                task->dyn_handles[(nbSubCellGroups*2) + 3] = cellHandles[idxLevel+1][idxSubGroup].up;
BRAMAS Berenger's avatar
BRAMAS Berenger committed
773
                nbSubCellGroups += 1;
BRAMAS Berenger's avatar
BRAMAS Berenger committed
774 775

                while(tree->getCellGroup(idxLevel+1, idxSubGroup)->getEndingIndex() <= (((currentCells->getEndingIndex()-1)<<3)+7)
776
                      && (idxSubGroup+1) != tree->getNbCellGroupAtLevel(idxLevel+1)
BRAMAS Berenger's avatar
BRAMAS Berenger committed
777
                      && tree->getCellGroup(idxLevel+1, idxSubGroup+1)->getStartingIndex() <= ((currentCells->getEndingIndex()-1)<<3)+7 ){
778
                    idxSubGroup += 1;
779 780
                    task->dyn_handles[(nbSubCellGroups*2) + 2] = cellHandles[idxLevel+1][idxSubGroup].symb;
                    task->dyn_handles[(nbSubCellGroups*2) + 3] = cellHandles[idxLevel+1][idxSubGroup].up;
BRAMAS Berenger's avatar
BRAMAS Berenger committed
781 782 783 784 785 786 787 788 789 790
                    nbSubCellGroups += 1;
                    FAssertLF( nbSubCellGroups <= 9 );
                }

                // put the right codelet
                task->cl = &m2m_cl[nbSubCellGroups-1];
                // put args values
                char *arg_buffer;
                size_t arg_buffer_size;
                starpu_codelet_pack_args((void**)&arg_buffer, &arg_buffer_size,
791
                                         STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
BRAMAS Berenger's avatar
BRAMAS Berenger committed
792 793
                                         STARPU_VALUE, &nbSubCellGroups, sizeof(nbSubCellGroups),
                                         STARPU_VALUE, &idxLevel, sizeof(idxLevel),
794
                                         STARPU_VALUE, &cellHandles[idxLevel][idxGroup].intervalSize, sizeof(int),
BRAMAS Berenger's avatar
BRAMAS Berenger committed
795 796 797
                                         0);
                task->cl_arg = arg_buffer;
                task->cl_arg_size = arg_buffer_size;
798
                task->priority = FStarPUFmmPriorities::Controller().getPrioM2M(idxLevel);
BRAMAS Berenger's avatar
BRAMAS Berenger committed
799 800 801 802 803 804 805 806 807 808 809 810 811
                FAssertLF(starpu_task_submit(task) == 0);
            }
        }
        FLOG( FLog::Controller << "\t\t upwardPass in " << timer.tacAndElapsed() << "s\n" );
    }

    /////////////////////////////////////////////////////////////////////////////////////
    /// Transfer Pass
    /////////////////////////////////////////////////////////////////////////////////////

    void transferPass(){
        FLOG( FTic timer; );
        FLOG( FTic timerInBlock; FTic timerOutBlock; );
812
        for(int idxLevel = FAbstractAlgorithm::lowerWorkingLevel-1 ; idxLevel >= FAbstractAlgorithm::upperWorkingLevel ; --idxLevel){
BRAMAS Berenger's avatar
BRAMAS Berenger committed
813 814 815
            FLOG( timerInBlock.tic() );
            for(int idxGroup = 0 ; idxGroup < tree->getNbCellGroupAtLevel(idxLevel) ; ++idxGroup){
                starpu_insert_task(&m2l_cl_in,
816 817 818
                                   STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
                                   STARPU_VALUE, &idxLevel, sizeof(idxLevel),
                                   STARPU_VALUE, &cellHandles[idxLevel][idxGroup].intervalSize, sizeof(int),
819
                                   STARPU_PRIORITY, FStarPUFmmPriorities::Controller().getPrioM2L(idxLevel),
820 821
                                   STARPU_R, cellHandles[idxLevel][idxGroup].symb,
                                   STARPU_R, cellHandles[idxLevel][idxGroup].up,
822
                                   (STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED), cellHandles[idxLevel][idxGroup].down,
823
                                   0);
BRAMAS Berenger's avatar
BRAMAS Berenger committed
824 825 826 827 828 829 830 831 832 833
            }
            FLOG( timerInBlock.tac() );

            FLOG( timerOutBlock.tic() );
            for(int idxGroup = 0 ; idxGroup < tree->getNbCellGroupAtLevel(idxLevel) ; ++idxGroup){
                for(int idxInteraction = 0; idxInteraction < int(externalInteractionsAllLevel[idxLevel][idxGroup].size()) ; ++idxInteraction){
                    const int interactionid = externalInteractionsAllLevel[idxLevel][idxGroup][idxInteraction].otherBlockId;
                    const std::vector<OutOfBlockInteraction>* outsideInteractions = &externalInteractionsAllLevel[idxLevel][idxGroup][idxInteraction].interactions;

                    starpu_insert_task(&m2l_cl_inout,
834 835 836 837
                                       STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
                                       STARPU_VALUE, &idxLevel, sizeof(idxLevel),
                                       STARPU_VALUE, &outsideInteractions, sizeof(outsideInteractions),
                                       STARPU_VALUE, &cellHandles[idxLevel][idxGroup].intervalSize, sizeof(int),
838
                                       STARPU_PRIORITY, FStarPUFmmPriorities::Controller().getPrioM2LExtern(idxLevel),
839 840 841 842 843 844 845
                                       STARPU_R, cellHandles[idxLevel][idxGroup].symb,
                                       STARPU_R, cellHandles[idxLevel][idxGroup].up,
                                       (STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED), cellHandles[idxLevel][idxGroup].down,
                                       STARPU_R, cellHandles[idxLevel][interactionid].symb,
                                       STARPU_R, cellHandles[idxLevel][interactionid].up,
                                       (STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED), cellHandles[idxLevel][interactionid].down,
                                       0);
BRAMAS Berenger's avatar
BRAMAS Berenger committed
846 847 848 849 850 851 852 853 854 855 856 857 858 859 860
                }
            }
            FLOG( timerOutBlock.tac() );
        }
        FLOG( FLog::Controller << "\t\t transferPass in " << timer.tacAndElapsed() << "s\n" );
        FLOG( FLog::Controller << "\t\t\t inblock in  " << timerInBlock.elapsed() << "s\n" );
        FLOG( FLog::Controller << "\t\t\t outblock in " << timerOutBlock.elapsed() << "s\n" );
    }

    /////////////////////////////////////////////////////////////////////////////////////
    /// Downard Pass
    /////////////////////////////////////////////////////////////////////////////////////

    void downardPass(){
        FLOG( FTic timer; );
861
        for(int idxLevel = FAbstractAlgorithm::upperWorkingLevel ; idxLevel < FAbstractAlgorithm::lowerWorkingLevel - 1 ; ++idxLevel){
BRAMAS Berenger's avatar
BRAMAS Berenger committed
862 863 864 865 866 867
            int idxSubGroup = 0;

            for(int idxGroup = 0 ; idxGroup < tree->getNbCellGroupAtLevel(idxLevel) ; ++idxGroup){
                CellContainerClass*const currentCells = tree->getCellGroup(idxLevel, idxGroup);

                struct starpu_task* const task = starpu_task_create();
868 869 870
                task->dyn_handles = (starpu_data_handle_t*)malloc(sizeof(starpu_data_handle_t)*20);
                task->dyn_handles[0] = cellHandles[idxLevel][idxGroup].symb;
                task->dyn_handles[1] = cellHandles[idxLevel][idxGroup].down;
BRAMAS Berenger's avatar
BRAMAS Berenger committed
871 872 873 874 875 876 877 878 879

                // Skip current group if needed
                if( tree->getCellGroup(idxLevel+1, idxSubGroup)->getEndingIndex() <= (currentCells->getStartingIndex()<<3) ){
                    ++idxSubGroup;
                    FAssertLF( idxSubGroup != tree->getNbCellGroupAtLevel(idxLevel+1) );
                    FAssertLF( (tree->getCellGroup(idxLevel+1, idxSubGroup)->getStartingIndex()>>3) == currentCells->getStartingIndex() );
                }
                // Copy at max 8 groups
                int nbSubCellGroups = 0;
880 881
                task->dyn_handles[(nbSubCellGroups*2) + 2] = cellHandles[idxLevel+1][idxSubGroup].symb;
                task->dyn_handles[(nbSubCellGroups*2) + 3] = cellHandles[idxLevel+1][idxSubGroup].down;
BRAMAS Berenger's avatar
BRAMAS Berenger committed
882
                nbSubCellGroups += 1;
BRAMAS Berenger's avatar
BRAMAS Berenger committed
883
                while(tree->getCellGroup(idxLevel+1, idxSubGroup)->getEndingIndex() <= (((currentCells->getEndingIndex()-1)<<3)+7)
884
                      && (idxSubGroup+1) != tree->getNbCellGroupAtLevel(idxLevel+1)
BRAMAS Berenger's avatar
BRAMAS Berenger committed
885
                      && tree->getCellGroup(idxLevel+1, idxSubGroup+1)->getStartingIndex() <= ((currentCells->getEndingIndex()-1)<<3)+7 ){
886
                    idxSubGroup += 1;
887 888
                    task->dyn_handles[(nbSubCellGroups*2) + 2] = cellHandles[idxLevel+1][idxSubGroup].symb;
                    task->dyn_handles[(nbSubCellGroups*2) + 3] = cellHandles[idxLevel+1][idxSubGroup].down;
BRAMAS Berenger's avatar
BRAMAS Berenger committed
889 890 891 892 893 894 895 896 897 898
                    nbSubCellGroups += 1;
                    FAssertLF( nbSubCellGroups <= 9 );
                }

                // put the right codelet
                task->cl = &l2l_cl[nbSubCellGroups-1];
                // put args values
                char *arg_buffer;
                size_t arg_buffer_size;
                starpu_codelet_pack_args((void**)&arg_buffer, &arg_buffer_size,
899
                                         STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
BRAMAS Berenger's avatar
BRAMAS Berenger committed
900 901
                                         STARPU_VALUE, &nbSubCellGroups, sizeof(nbSubCellGroups),
                                         STARPU_VALUE, &idxLevel, sizeof(idxLevel),
902
                                         STARPU_VALUE, &cellHandles[idxLevel][idxGroup].intervalSize, sizeof(int),
BRAMAS Berenger's avatar
BRAMAS Berenger committed
903 904 905
                                         0);
                task->cl_arg = arg_buffer;
                task->cl_arg_size = arg_buffer_size;
906
                task->priority = FStarPUFmmPriorities::Controller().getPrioL2L(idxLevel);
BRAMAS Berenger's avatar
BRAMAS Berenger committed
907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923
                FAssertLF(starpu_task_submit(task) == 0);
            }
        }
        FLOG( FLog::Controller << "\t\t downardPass in " << timer.tacAndElapsed() << "s\n" );
    }

    /////////////////////////////////////////////////////////////////////////////////////
    /// Direct Pass
    /////////////////////////////////////////////////////////////////////////////////////

    void directPass(){
        FLOG( FTic timer; );
        FLOG( FTic timerInBlock; FTic timerOutBlock; );

        FLOG( timerInBlock.tic() );
        for(int idxGroup = 0 ; idxGroup < tree->getNbParticleGroup() ; ++idxGroup){
            starpu_insert_task(&p2p_cl_in,
924 925
                               STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
                               STARPU_VALUE, &particleHandles[idxGroup].intervalSize, sizeof(int),
926
                               STARPU_PRIORITY, FStarPUFmmPriorities::Controller().getPrioP2P(),
927
                               STARPU_R, particleHandles[idxGroup].symb,
928
                               (STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED), particleHandles[idxGroup].down,
929
                               0);
BRAMAS Berenger's avatar
BRAMAS Berenger committed
930 931 932 933
        }
        FLOG( timerInBlock.tac() );
        FLOG( timerOutBlock.tic() );
        for(int idxGroup = 0 ; idxGroup < tree->getNbParticleGroup() ; ++idxGroup){
934
            for(int idxInteraction = 0; idxInteraction < int(externalInteractionsLeafLevel[idxGroup].size()) ; ++idxInteraction){
BRAMAS Berenger's avatar
BRAMAS Berenger committed
935 936
                const int interactionid = externalInteractionsLeafLevel[idxGroup][idxInteraction].otherBlockId;
                const std::vector<OutOfBlockInteraction>* outsideInteractions = &externalInteractionsLeafLevel[idxGroup][idxInteraction].interactions;
937
                starpu_insert_task(&p2p_cl_inout,
938 939 940
                                   STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
                                   STARPU_VALUE, &outsideInteractions, sizeof(outsideInteractions),
                                   STARPU_VALUE, &particleHandles[idxGroup].intervalSize, sizeof(int),
941
                                   STARPU_PRIORITY, FStarPUFmmPriorities::Controller().getPrioP2PExtern(),
942
                                   STARPU_R, particleHandles[idxGroup].symb,
943
                                   (STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED), particleHandles[idxGroup].down,
944
                                   STARPU_R, particleHandles[interactionid].symb,
945
                                   (STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED), particleHandles[interactionid].down,
946
                                   0);
BRAMAS Berenger's avatar
BRAMAS Berenger committed
947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963
            }
        }
        FLOG( timerOutBlock.tac() );

        FLOG( FLog::Controller << "\t\t directPass in " << timer.tacAndElapsed() << "s\n" );
        FLOG( FLog::Controller << "\t\t\t inblock  in " << timerInBlock.elapsed() << "s\n" );
        FLOG( FLog::Controller << "\t\t\t outblock in " << timerOutBlock.elapsed() << "s\n" );
    }
    /////////////////////////////////////////////////////////////////////////////////////
    /// Merge Pass
    /////////////////////////////////////////////////////////////////////////////////////

    void mergePass(){
        FLOG( FTic timer; );

        for(int idxGroup = 0 ; idxGroup < tree->getNbParticleGroup() ; ++idxGroup){
            starpu_insert_task(&l2p_cl,
964 965 966
                               STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
                               STARPU_VALUE, &cellHandles[tree->getHeight()-1][idxGroup].intervalSize, sizeof(int),
                    STARPU_PRIORITY, FStarPUFmmPriorities::Controller().getPrioL2P(),
967 968 969
                    STARPU_R, cellHandles[tree->getHeight()-1][idxGroup].symb,
                    STARPU_R, cellHandles[tree->getHeight()-1][idxGroup].down,
                    STARPU_R, particleHandles[idxGroup].symb,
970
                    (STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED), particleHandles[idxGroup].down,
BRAMAS Berenger's avatar
BRAMAS Berenger committed
971 972 973 974 975 976 977 978
                    0);
        }

        FLOG( FLog::Controller << "\t\t L2P in " << timer.tacAndElapsed() << "s\n" );
    }
};

#endif // FGROUPTASKSTARPUALGORITHM_HPP