FGroupTaskStarpuAlgorithm.hpp 54 KB
Newer Older
1 2
// Keep in private GIT
// @SCALFMM_PRIVATE
BRAMAS Berenger's avatar
BRAMAS Berenger committed
3 4 5
#ifndef FGROUPTASKSTARPUALGORITHM_HPP
#define FGROUPTASKSTARPUALGORITHM_HPP

6 7 8 9 10 11 12
#include "../../Utils/FGlobal.hpp"
#include "../../Core/FCoreCommon.hpp"
#include "../../Utils/FQuickSort.hpp"
#include "../../Containers/FTreeCoordinate.hpp"
#include "../../Utils/FLog.hpp"
#include "../../Utils/FTic.hpp"
#include "../../Utils/FAssert.hpp"
BRAMAS Berenger's avatar
BRAMAS Berenger committed
13

14 15
#include "FOutOfBlockInteraction.hpp"

BRAMAS Berenger's avatar
BRAMAS Berenger committed
16
#include <vector>
17
#include <memory>
BRAMAS Berenger's avatar
BRAMAS Berenger committed
18 19 20 21

#include <omp.h>

#include <starpu.h>
22
#include "../StarPUUtils/FStarPUUtils.hpp"
23
#include "../StarPUUtils/FStarPUFmmPriorities.hpp"
BRAMAS Berenger's avatar
BRAMAS Berenger committed
24

25
#ifdef STARPU_USE_CPU
26
#include "../StarPUUtils/FStarPUCpuWrapper.hpp"
27
#endif
28
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
29 30 31 32 33
#include "../StarPUUtils/FStarPUCudaWrapper.hpp"
#include "../Cuda/FCudaEmptyKernel.hpp"
#include "../Cuda/FCudaGroupAttachedLeaf.hpp"
#include "../Cuda/FCudaGroupOfParticles.hpp"
#include "../Cuda/FCudaGroupOfCells.hpp"
34
#include "../Cuda/FCudaEmptyCellSymb.hpp"
35
#endif
36
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
37 38
#include "../StarPUUtils/FStarPUOpenClWrapper.hpp"
#include "../OpenCl/FOpenCLDeviceWrapper.hpp"
39 40
#endif

41

BRAMAS Berenger's avatar
BRAMAS Berenger committed
42 43
#include "Containers/FBoolArray.hpp"

44
template <class OctreeClass, class CellContainerClass, class KernelClass, class ParticleGroupClass, class StarPUCpuWrapperClass
45 46 47 48 49 50 51
          #ifdef SCALFMM_ENABLE_CUDA_KERNEL
          , class StarPUCudaWrapperClass = FStarPUCudaWrapper<KernelClass, FCudaEmptyCellSymb, int, int, FCudaGroupOfCells<FCudaEmptyCellSymb, int, int>,
                                                              FCudaGroupOfParticles<int, 0, 0, int>, FCudaGroupAttachedLeaf<int, 0, 0, int>, FCudaEmptyKernel<int> >
          #endif
          #ifdef SCALFMM_ENABLE_OPENCL_KERNEL
          , class StarPUOpenClWrapperClass = FStarPUOpenClWrapper<KernelClass, FOpenCLDeviceWrapper<KernelClass>>
          #endif
52
          >
53
class FGroupTaskStarPUAlgorithm : public FAbstractAlgorithm {
BRAMAS Berenger's avatar
BRAMAS Berenger committed
54
protected:
55
    typedef FGroupTaskStarPUAlgorithm<OctreeClass, CellContainerClass, KernelClass, ParticleGroupClass, StarPUCpuWrapperClass
56
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
57
    , StarPUCudaWrapperClass
58
#endif
59
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
60
    , StarPUOpenClWrapperClass
61 62
#endif
    > ThisClass;
BRAMAS Berenger's avatar
BRAMAS Berenger committed
63 64 65 66 67 68 69 70

    template <class OtherBlockClass>
    struct BlockInteractions{
        OtherBlockClass* otherBlock;
        int otherBlockId;
        std::vector<OutOfBlockInteraction> interactions;
    };

71 72 73 74
    struct CellHandles{
        starpu_data_handle_t symb;
        starpu_data_handle_t up;
        starpu_data_handle_t down;
75
        int intervalSize;
76 77 78 79 80
    };

    struct ParticleHandles{
        starpu_data_handle_t symb;
        starpu_data_handle_t down;
81
        int intervalSize;
82 83
    };

BRAMAS Berenger's avatar
BRAMAS Berenger committed
84 85 86 87
    std::vector< std::vector< std::vector<BlockInteractions<CellContainerClass>>>> externalInteractionsAllLevel;
    std::vector< std::vector<BlockInteractions<ParticleGroupClass>>> externalInteractionsLeafLevel;

    OctreeClass*const tree;       //< The Tree
88
    KernelClass*const originalCpuKernel;
BRAMAS Berenger's avatar
BRAMAS Berenger committed
89

90 91
    std::vector<CellHandles>* cellHandles;
    std::vector<ParticleHandles> particleHandles;
BRAMAS Berenger's avatar
BRAMAS Berenger committed
92 93

    starpu_codelet p2m_cl;
BRAMAS Berenger's avatar
BRAMAS Berenger committed
94 95
    starpu_codelet m2m_cl;
    starpu_codelet l2l_cl;
BRAMAS Berenger's avatar
BRAMAS Berenger committed
96 97 98 99 100 101 102 103
    starpu_codelet l2p_cl;

    starpu_codelet m2l_cl_in;
    starpu_codelet m2l_cl_inout;

    starpu_codelet p2p_cl_in;
    starpu_codelet p2p_cl_inout;

BRAMAS Berenger's avatar
BRAMAS Berenger committed
104
#ifdef STARPU_USE_CPU
105
    StarPUCpuWrapperClass cpuWrapper;
106
#endif
107
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
108 109
    StarPUCudaWrapperClass cudaWrapper;
#endif
110
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
111 112
    StarPUOpenClWrapperClass openclWrapper;
#endif
113 114 115

    FStarPUPtrInterface wrappers;
    FStarPUPtrInterface* wrapperptr;
116

117
#ifdef STARPU_SUPPORT_ARBITER
BRAMAS Berenger's avatar
BRAMAS Berenger committed
118
    starpu_arbiter_t arbiterGlobal;
119
#endif
120 121 122 123 124 125 126 127 128 129 130 131

#ifdef STARPU_USE_TASK_NAME
    std::vector<std::unique_ptr<char[]>> m2mTaskNames;
    std::vector<std::unique_ptr<char[]>> m2lTaskNames;
    std::vector<std::unique_ptr<char[]>> m2lOuterTaskNames;
    std::vector<std::unique_ptr<char[]>> l2lTaskNames;
    std::unique_ptr<char[]> p2mTaskNames;
    std::unique_ptr<char[]> l2pTaskNames;
    std::unique_ptr<char[]> p2pTaskNames;
    std::unique_ptr<char[]> p2pOuterTaskNames;
#endif

BRAMAS Berenger's avatar
BRAMAS Berenger committed
132
public:
133 134
    FGroupTaskStarPUAlgorithm(OctreeClass*const inTree, KernelClass* inKernels)
        : tree(inTree), originalCpuKernel(inKernels),
135
          cellHandles(nullptr),
136 137 138 139 140 141 142 143 144 145
      #ifdef STARPU_USE_CPU
          cpuWrapper(tree->getHeight()),
      #endif
      #ifdef SCALFMM_ENABLE_CUDA_KERNEL
          cudaWrapper(tree->getHeight()),
      #endif
      #ifdef SCALFMM_ENABLE_OPENCL_KERNEL
          openclWrapper(tree->getHeight()),
      #endif
          wrapperptr(&wrappers){
BRAMAS Berenger's avatar
BRAMAS Berenger committed
146 147 148
        FAssertLF(tree, "tree cannot be null");
        FAssertLF(inKernels, "kernels cannot be null");

149 150
        FAbstractAlgorithm::setNbLevelsInTree(tree->getHeight());

151 152
        struct starpu_conf conf;
        FAssertLF(starpu_conf_init(&conf) == 0);
153
        FStarPUFmmPriorities::Controller().init(&conf, tree->getHeight(), inKernels);
154
        FAssertLF(starpu_init(&conf) == 0);
155 156 157

        starpu_pthread_mutex_t initMutex;
        starpu_pthread_mutex_init(&initMutex, NULL);
158
#ifdef STARPU_USE_CPU
159 160 161 162 163
        FStarPUUtils::ExecOnWorkers(STARPU_CPU, [&](){
            starpu_pthread_mutex_lock(&initMutex);
            cpuWrapper.initKernel(starpu_worker_get_id(), inKernels);
            starpu_pthread_mutex_unlock(&initMutex);
        });
164
        wrappers.set(FSTARPU_CPU_IDX, &cpuWrapper);
165
#endif
166
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
167 168 169 170 171 172 173
        FStarPUUtils::ExecOnWorkers(STARPU_CUDA, [&](){
            starpu_pthread_mutex_lock(&initMutex);
            cudaWrapper.initKernel(starpu_worker_get_id(), inKernels);
            starpu_pthread_mutex_unlock(&initMutex);
        });
        wrappers.set(FSTARPU_CUDA_IDX, &cudaWrapper);
#endif
174
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
175 176 177 178 179 180
        FStarPUUtils::ExecOnWorkers(STARPU_OPENCL, [&](){
            starpu_pthread_mutex_lock(&initMutex);
            openclWrapper.initKernel(starpu_worker_get_id(), inKernels);
            starpu_pthread_mutex_unlock(&initMutex);
        });
        wrappers.set(FSTARPU_OPENCL_IDX, &openclWrapper);
181
#endif
182 183
        starpu_pthread_mutex_destroy(&initMutex);

184 185
        starpu_pause();

186
        cellHandles   = new std::vector<CellHandles>[tree->getHeight()];
187

188
#ifdef STARPU_SUPPORT_ARBITER
BRAMAS Berenger's avatar
BRAMAS Berenger committed
189
        arbiterGlobal = starpu_arbiter_create();
190 191
#endif

192 193 194
        initCodelet();
        rebuildInteractions();

BRAMAS Berenger's avatar
BRAMAS Berenger committed
195
        FLOG(FLog::Controller << "FGroupTaskStarPUAlgorithm (Max Worker " << starpu_worker_get_count() << ")\n");
BRAMAS Berenger's avatar
BRAMAS Berenger committed
196
#ifdef STARPU_USE_CPU
BRAMAS Berenger's avatar
BRAMAS Berenger committed
197
        FLOG(FLog::Controller << "FGroupTaskStarPUAlgorithm (Max CPU " << starpu_cpu_worker_get_count() << ")\n");
BRAMAS Berenger's avatar
BRAMAS Berenger committed
198
#endif
199
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
BRAMAS Berenger's avatar
BRAMAS Berenger committed
200 201
        FLOG(FLog::Controller << "FGroupTaskStarPUAlgorithm (Max OpenCL " << starpu_opencl_worker_get_count() << ")\n");
#endif
202
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
BRAMAS Berenger's avatar
BRAMAS Berenger committed
203 204
        FLOG(FLog::Controller << "FGroupTaskStarPUAlgorithm (Max CUDA " << starpu_cuda_worker_get_count() << ")\n");
#endif
205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235

        buildTaskNames();
    }

    void buildTaskNames(){
#ifdef STARPU_USE_TASK_NAME
        const int namesLength = 128;
        m2mTaskNames.resize(tree->getHeight());
        m2lTaskNames.resize(tree->getHeight());
        m2lOuterTaskNames.resize(tree->getHeight());
        l2lTaskNames.resize(tree->getHeight());
        for(int idxLevel = 0 ; idxLevel < tree->getHeight() ; ++idxLevel){
            m2mTaskNames[idxLevel].reset(new char[namesLength]);
            snprintf(m2mTaskNames[idxLevel].get(), namesLength, "M2M-level-%d", idxLevel);
            m2lTaskNames[idxLevel].reset(new char[namesLength]);
            snprintf(m2lTaskNames[idxLevel].get(), namesLength, "M2L-level-%d", idxLevel);
            m2lOuterTaskNames[idxLevel].reset(new char[namesLength]);
            snprintf(m2lOuterTaskNames[idxLevel].get(), namesLength, "M2L-out-level-%d", idxLevel);
            l2lTaskNames[idxLevel].reset(new char[namesLength]);
            snprintf(l2lTaskNames[idxLevel].get(), namesLength, "L2L-level-%d", idxLevel);
        }

        p2mTaskNames.reset(new char[namesLength]);
        snprintf(p2mTaskNames.get(), namesLength, "P2M");
        l2pTaskNames.reset(new char[namesLength]);
        snprintf(l2pTaskNames.get(), namesLength, "L2P");
        p2pTaskNames.reset(new char[namesLength]);
        snprintf(p2pTaskNames.get(), namesLength, "P2P");
        p2pOuterTaskNames.reset(new char[namesLength]);
        snprintf(p2pOuterTaskNames.get(), namesLength, "P2P-out");
#endif
BRAMAS Berenger's avatar
BRAMAS Berenger committed
236 237 238
    }

    ~FGroupTaskStarPUAlgorithm(){
BRAMAS Berenger's avatar
BRAMAS Berenger committed
239 240
        starpu_resume();

BRAMAS Berenger's avatar
BRAMAS Berenger committed
241
        cleanHandle();
242
        delete[] cellHandles;
BRAMAS Berenger's avatar
BRAMAS Berenger committed
243

BRAMAS Berenger's avatar
BRAMAS Berenger committed
244 245 246 247 248 249 250 251 252 253
        starpu_pthread_mutex_t releaseMutex;
        starpu_pthread_mutex_init(&releaseMutex, NULL);
#ifdef STARPU_USE_CPU
        FStarPUUtils::ExecOnWorkers(STARPU_CPU, [&](){
            starpu_pthread_mutex_lock(&releaseMutex);
            cpuWrapper.releaseKernel(starpu_worker_get_id());
            starpu_pthread_mutex_unlock(&releaseMutex);
        });
        wrappers.set(FSTARPU_CPU_IDX, &cpuWrapper);
#endif
254
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
BRAMAS Berenger's avatar
BRAMAS Berenger committed
255 256 257 258 259 260 261
        FStarPUUtils::ExecOnWorkers(STARPU_CUDA, [&](){
            starpu_pthread_mutex_lock(&releaseMutex);
            cudaWrapper.releaseKernel(starpu_worker_get_id());
            starpu_pthread_mutex_unlock(&releaseMutex);
        });
        wrappers.set(FSTARPU_CUDA_IDX, &cudaWrapper);
#endif
262
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
BRAMAS Berenger's avatar
BRAMAS Berenger committed
263 264 265 266 267 268 269 270 271
        FStarPUUtils::ExecOnWorkers(STARPU_OPENCL, [&](){
            starpu_pthread_mutex_lock(&releaseMutex);
            openclWrapper.releaseKernel(starpu_worker_get_id());
            starpu_pthread_mutex_unlock(&releaseMutex);
        });
        wrappers.set(FSTARPU_OPENCL_IDX, &openclWrapper);
#endif
        starpu_pthread_mutex_destroy(&releaseMutex);

272 273

#ifdef STARPU_SUPPORT_ARBITER
BRAMAS Berenger's avatar
BRAMAS Berenger committed
274
        starpu_arbiter_destroy(arbiterGlobal);
275 276
#endif

BRAMAS Berenger's avatar
BRAMAS Berenger committed
277 278 279
        starpu_shutdown();
    }

BRAMAS Berenger's avatar
BRAMAS Berenger committed
280 281 282 283 284 285 286 287
    void rebuildInteractions(){
        #pragma omp parallel
        #pragma omp single
        buildExternalInteractionVecs();

        buildHandles();
    }

288 289 290 291 292
protected:
    /**
      * Runs the complete algorithm.
      */
    void executeCore(const unsigned operationsToProceed) override {
BRAMAS Berenger's avatar
BRAMAS Berenger committed
293
        FLOG( FLog::Controller << "\tStart FGroupTaskStarPUAlgorithm\n" );
294
        const bool directOnly = (tree->getHeight() <= 2);
BRAMAS Berenger's avatar
BRAMAS Berenger committed
295 296 297

        starpu_resume();

298
        if(operationsToProceed & FFmmP2M && !directOnly) bottomPass();
BRAMAS Berenger's avatar
BRAMAS Berenger committed
299

300
        if(operationsToProceed & FFmmM2M && !directOnly) upwardPass();
BRAMAS Berenger's avatar
BRAMAS Berenger committed
301

302
        if(operationsToProceed & FFmmM2L && !directOnly) transferPass();
BRAMAS Berenger's avatar
BRAMAS Berenger committed
303

304
        if(operationsToProceed & FFmmL2L && !directOnly) downardPass();
BRAMAS Berenger's avatar
BRAMAS Berenger committed
305

BRAMAS Berenger's avatar
BRAMAS Berenger committed
306 307
        if( operationsToProceed & FFmmP2P ) directPass();

308
        if( operationsToProceed & FFmmL2P && !directOnly) mergePass();
BRAMAS Berenger's avatar
BRAMAS Berenger committed
309 310 311 312 313

        starpu_task_wait_for_all();
        starpu_pause();
    }

314

BRAMAS Berenger's avatar
BRAMAS Berenger committed
315 316
    void initCodelet(){
        memset(&p2m_cl, 0, sizeof(p2m_cl));
317
#ifdef STARPU_USE_CPU
318
        if(originalCpuKernel->supportP2M(FSTARPU_CPU_IDX)){
319 320 321
            p2m_cl.cpu_funcs[0] = StarPUCpuWrapperClass::bottomPassCallback;
            p2m_cl.where |= STARPU_CPU;
        }
322
#endif
323
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
324
        if(originalCpuKernel->supportP2M(FSTARPU_CUDA_IDX)){
325 326 327 328
            p2m_cl.cuda_funcs[0] = StarPUCudaWrapperClass::bottomPassCallback;
            p2m_cl.where |= STARPU_CUDA;
        }
#endif
329
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
330
        if(originalCpuKernel->supportP2M(FSTARPU_OPENCL_IDX)){
331 332 333
            p2m_cl.opencl_funcs[0] = StarPUOpenClWrapperClass::bottomPassCallback;
            p2m_cl.where |= STARPU_OPENCL;
        }
334
#endif
335 336 337 338
        p2m_cl.nbuffers = 3;
        p2m_cl.modes[0] = STARPU_R;
        p2m_cl.modes[1] = STARPU_RW;
        p2m_cl.modes[2] = STARPU_R;
339
        p2m_cl.name = "p2m_cl";
BRAMAS Berenger's avatar
BRAMAS Berenger committed
340

BRAMAS Berenger's avatar
BRAMAS Berenger committed
341
        memset(&m2m_cl, 0, sizeof(m2m_cl));
342
#ifdef STARPU_USE_CPU
BRAMAS Berenger's avatar
BRAMAS Berenger committed
343 344 345 346
        if(originalCpuKernel->supportM2M(FSTARPU_CPU_IDX)){
            m2m_cl.cpu_funcs[0] = StarPUCpuWrapperClass::upwardPassCallback;
            m2m_cl.where |= STARPU_CPU;
        }
347
#endif
348
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
BRAMAS Berenger's avatar
BRAMAS Berenger committed
349 350 351 352
        if(originalCpuKernel->supportM2M(FSTARPU_CUDA_IDX)){
            m2m_cl.cuda_funcs[0] = StarPUCudaWrapperClass::upwardPassCallback;
            m2m_cl.where |= STARPU_CUDA;
        }
353
#endif
354
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
BRAMAS Berenger's avatar
BRAMAS Berenger committed
355 356 357 358
        if(originalCpuKernel->supportM2M(FSTARPU_OPENCL_IDX)){
            m2m_cl.opencl_funcs[0] = StarPUOpenClWrapperClass::upwardPassCallback;
            m2m_cl.where |= STARPU_OPENCL;
        }
359
#endif
BRAMAS Berenger's avatar
BRAMAS Berenger committed
360 361 362 363 364 365 366
        m2m_cl.nbuffers = 4;
        m2m_cl.dyn_modes = (starpu_data_access_mode*)malloc(m2m_cl.nbuffers*sizeof(starpu_data_access_mode));
        m2m_cl.dyn_modes[0] = STARPU_R;
        m2m_cl.dyn_modes[1] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED);
        m2m_cl.name = "m2m_cl";
        m2m_cl.dyn_modes[2] = STARPU_R;
        m2m_cl.dyn_modes[3] = STARPU_R;
BRAMAS Berenger's avatar
BRAMAS Berenger committed
367

BRAMAS Berenger's avatar
BRAMAS Berenger committed
368
        memset(&l2l_cl, 0, sizeof(l2l_cl));
369
#ifdef STARPU_USE_CPU
BRAMAS Berenger's avatar
BRAMAS Berenger committed
370 371 372 373
        if(originalCpuKernel->supportL2L(FSTARPU_CPU_IDX)){
            l2l_cl.cpu_funcs[0] = StarPUCpuWrapperClass::downardPassCallback;
            l2l_cl.where |= STARPU_CPU;
        }
374
#endif
375
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
BRAMAS Berenger's avatar
BRAMAS Berenger committed
376 377 378 379
        if(originalCpuKernel->supportL2L(FSTARPU_CUDA_IDX)){
            l2l_cl.cuda_funcs[0] = StarPUCudaWrapperClass::downardPassCallback;
            l2l_cl.where |= STARPU_CUDA;
        }
380
#endif
381
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
BRAMAS Berenger's avatar
BRAMAS Berenger committed
382 383 384
        if(originalCpuKernel->supportL2L(FSTARPU_OPENCL_IDX)){
            l2l_cl.opencl_funcs[0] = StarPUOpenClWrapperClass::downardPassCallback;
            l2l_cl.where |= STARPU_OPENCL;
BRAMAS Berenger's avatar
BRAMAS Berenger committed
385
        }
BRAMAS Berenger's avatar
BRAMAS Berenger committed
386 387 388 389 390 391 392 393
#endif
        l2l_cl.nbuffers = 4;
        l2l_cl.dyn_modes = (starpu_data_access_mode*)malloc(l2l_cl.nbuffers*sizeof(starpu_data_access_mode));
        l2l_cl.dyn_modes[0] = STARPU_R;
        l2l_cl.dyn_modes[1] = STARPU_R;
        l2l_cl.name = "l2l_cl";
        l2l_cl.dyn_modes[2] = STARPU_R;
        l2l_cl.dyn_modes[3] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED);
BRAMAS Berenger's avatar
BRAMAS Berenger committed
394 395

        memset(&l2p_cl, 0, sizeof(l2p_cl));
396
#ifdef STARPU_USE_CPU
397
        if(originalCpuKernel->supportL2P(FSTARPU_CPU_IDX)){
398 399 400
            l2p_cl.cpu_funcs[0] = StarPUCpuWrapperClass::mergePassCallback;
            l2p_cl.where |= STARPU_CPU;
        }
401
#endif
402
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
403
        if(originalCpuKernel->supportL2P(FSTARPU_CUDA_IDX)){
404 405 406 407
            l2p_cl.cuda_funcs[0] = StarPUCudaWrapperClass::mergePassCallback;
            l2p_cl.where |= STARPU_CUDA;
        }
#endif
408
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
409
        if(originalCpuKernel->supportL2P(FSTARPU_OPENCL_IDX)){
410 411 412
            l2p_cl.opencl_funcs[0] = StarPUOpenClWrapperClass::mergePassCallback;
            l2p_cl.where |= STARPU_OPENCL;
        }
413
#endif
414
        l2p_cl.nbuffers = 4;
BRAMAS Berenger's avatar
BRAMAS Berenger committed
415
        l2p_cl.modes[0] = STARPU_R;
416 417
        l2p_cl.modes[1] = STARPU_R;
        l2p_cl.modes[2] = STARPU_R;
418
        l2p_cl.modes[3] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED);
419
        l2p_cl.name = "l2p_cl";
BRAMAS Berenger's avatar
BRAMAS Berenger committed
420 421

        memset(&p2p_cl_in, 0, sizeof(p2p_cl_in));
422
#ifdef STARPU_USE_CPU
423
        if(originalCpuKernel->supportP2P(FSTARPU_CPU_IDX)){
424 425 426
            p2p_cl_in.cpu_funcs[0] = StarPUCpuWrapperClass::directInPassCallback;
            p2p_cl_in.where |= STARPU_CPU;
        }
427
#endif
428
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
429
        if(originalCpuKernel->supportP2P(FSTARPU_CUDA_IDX)){
430 431 432 433
            p2p_cl_in.cuda_funcs[0] = StarPUCudaWrapperClass::directInPassCallback;
            p2p_cl_in.where |= STARPU_CUDA;
        }
#endif
434
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
435
        if(originalCpuKernel->supportP2P(FSTARPU_OPENCL_IDX)){
436 437 438
            p2p_cl_in.opencl_funcs[0] = StarPUOpenClWrapperClass::directInPassCallback;
            p2p_cl_in.where |= STARPU_OPENCL;
        }
439
#endif
440 441
        p2p_cl_in.nbuffers = 2;
        p2p_cl_in.modes[0] = STARPU_R;
442
        p2p_cl_in.modes[1] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED);
443
        p2p_cl_in.name = "p2p_cl_in";
BRAMAS Berenger's avatar
BRAMAS Berenger committed
444
        memset(&p2p_cl_inout, 0, sizeof(p2p_cl_inout));
445
#ifdef STARPU_USE_CPU
446
        if(originalCpuKernel->supportP2PExtern(FSTARPU_CPU_IDX)){
447 448 449
            p2p_cl_inout.cpu_funcs[0] = StarPUCpuWrapperClass::directInoutPassCallback;
            p2p_cl_inout.where |= STARPU_CPU;
        }
450
#endif
451
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
452
        if(originalCpuKernel->supportP2PExtern(FSTARPU_CUDA_IDX)){
453 454 455 456
            p2p_cl_inout.cuda_funcs[0] = StarPUCudaWrapperClass::directInoutPassCallback;
            p2p_cl_inout.where |= STARPU_CUDA;
        }
#endif
457
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
458
        if(originalCpuKernel->supportP2PExtern(FSTARPU_OPENCL_IDX)){
459 460 461
            p2p_cl_inout.opencl_funcs[0] = StarPUOpenClWrapperClass::directInoutPassCallback;
            p2p_cl_inout.where |= STARPU_OPENCL;
        }
462
#endif
463 464
        p2p_cl_inout.nbuffers = 4;
        p2p_cl_inout.modes[0] = STARPU_R;
465
        p2p_cl_inout.modes[1] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED);
466
        p2p_cl_inout.modes[2] = STARPU_R;
467
        p2p_cl_inout.modes[3] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED);
468
        p2p_cl_inout.name = "p2p_cl_inout";
BRAMAS Berenger's avatar
BRAMAS Berenger committed
469 470

        memset(&m2l_cl_in, 0, sizeof(m2l_cl_in));
471
#ifdef STARPU_USE_CPU
472
        if(originalCpuKernel->supportM2L(FSTARPU_CPU_IDX)){
473 474 475
            m2l_cl_in.cpu_funcs[0] = StarPUCpuWrapperClass::transferInPassCallback;
            m2l_cl_in.where |= STARPU_CPU;
        }
476
#endif
477
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
478
        if(originalCpuKernel->supportM2L(FSTARPU_CUDA_IDX)){
479 480 481 482
            m2l_cl_in.cuda_funcs[0] = StarPUCudaWrapperClass::transferInPassCallback;
            m2l_cl_in.where |= STARPU_CUDA;
        }
#endif
483
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
484
        if(originalCpuKernel->supportM2L(FSTARPU_OPENCL_IDX)){
485 486 487
            m2l_cl_in.opencl_funcs[0] = StarPUOpenClWrapperClass::transferInPassCallback;
            m2l_cl_in.where |= STARPU_OPENCL;
        }
488
#endif
489 490
        m2l_cl_in.nbuffers = 3;
        m2l_cl_in.modes[0] = STARPU_R;
491
        m2l_cl_in.modes[1] = STARPU_R;
492
        m2l_cl_in.modes[2] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED);
493
        m2l_cl_in.name = "m2l_cl_in";
BRAMAS Berenger's avatar
BRAMAS Berenger committed
494

BRAMAS Berenger's avatar
BRAMAS Berenger committed
495
        memset(&m2l_cl_inout, 0, sizeof(m2l_cl_inout));
496
#ifdef STARPU_USE_CPU
497
        if(originalCpuKernel->supportM2LExtern(FSTARPU_CPU_IDX)){
498 499 500
            m2l_cl_inout.cpu_funcs[0] = StarPUCpuWrapperClass::transferInoutPassCallback;
            m2l_cl_inout.where |= STARPU_CPU;
        }
501
#endif
502
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
503
        if(originalCpuKernel->supportM2LExtern(FSTARPU_CUDA_IDX)){
504 505 506 507
            m2l_cl_inout.cuda_funcs[0] = StarPUCudaWrapperClass::transferInoutPassCallback;
            m2l_cl_inout.where |= STARPU_CUDA;
        }
#endif
508
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
509
        if(originalCpuKernel->supportM2LExtern(FSTARPU_OPENCL_IDX)){
510 511 512
            m2l_cl_inout.opencl_funcs[0] = StarPUOpenClWrapperClass::transferInoutPassCallback;
            m2l_cl_inout.where |= STARPU_OPENCL;
        }
513
#endif
BRAMAS Berenger's avatar
BRAMAS Berenger committed
514
        m2l_cl_inout.nbuffers = 4;
515
        m2l_cl_inout.modes[0] = STARPU_R;
BRAMAS Berenger's avatar
BRAMAS Berenger committed
516 517
        m2l_cl_inout.modes[1] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED);
        m2l_cl_inout.modes[2] = STARPU_R;
518
        m2l_cl_inout.modes[3] = STARPU_R;
519
        m2l_cl_inout.name = "m2l_cl_inout";
BRAMAS Berenger's avatar
BRAMAS Berenger committed
520 521 522 523 524
    }

    /** dealloc in a starpu way all the defined handles */
    void cleanHandle(){
        for(int idxLevel = 0 ; idxLevel < tree->getHeight() ; ++idxLevel){
525 526 527 528
            for(int idxHandle = 0 ; idxHandle < int(cellHandles[idxLevel].size()) ; ++idxHandle){
                starpu_data_unregister(cellHandles[idxLevel][idxHandle].symb);
                starpu_data_unregister(cellHandles[idxLevel][idxHandle].up);
                starpu_data_unregister(cellHandles[idxLevel][idxHandle].down);
529
            }
530
            cellHandles[idxLevel].clear();
BRAMAS Berenger's avatar
BRAMAS Berenger committed
531 532
        }
        {
533 534 535
            for(int idxHandle = 0 ; idxHandle < int(particleHandles.size()) ; ++idxHandle){
                starpu_data_unregister(particleHandles[idxHandle].symb);
                starpu_data_unregister(particleHandles[idxHandle].down);
BRAMAS Berenger's avatar
BRAMAS Berenger committed
536
            }
537
            particleHandles.clear();
BRAMAS Berenger's avatar
BRAMAS Berenger committed
538 539 540 541 542 543 544 545 546 547
        }
    }

    /** Reset the handles array and create new ones to define
     * in a starpu way each block of data
     */
    void buildHandles(){
        cleanHandle();

        for(int idxLevel = 2 ; idxLevel < tree->getHeight() ; ++idxLevel){
548
            cellHandles[idxLevel].resize(tree->getNbCellGroupAtLevel(idxLevel));
BRAMAS Berenger's avatar
BRAMAS Berenger committed
549 550 551

            for(int idxGroup = 0 ; idxGroup < tree->getNbCellGroupAtLevel(idxLevel) ; ++idxGroup){
                const CellContainerClass* currentCells = tree->getCellGroup(idxLevel, idxGroup);
552
                starpu_variable_data_register(&cellHandles[idxLevel][idxGroup].symb, 0,
553
                                              (uintptr_t)currentCells->getRawBuffer(), currentCells->getBufferSizeInByte());
554 555 556 557
                starpu_variable_data_register(&cellHandles[idxLevel][idxGroup].up, 0,
                                              (uintptr_t)currentCells->getRawMultipoleBuffer(), currentCells->getMultipoleBufferSizeInByte());
                starpu_variable_data_register(&cellHandles[idxLevel][idxGroup].down, 0,
                                              (uintptr_t)currentCells->getRawLocalBuffer(), currentCells->getLocalBufferSizeInByte());
558
                cellHandles[idxLevel][idxGroup].intervalSize = int(currentCells->getNumberOfCellsInBlock());
559
#ifdef STARPU_SUPPORT_ARBITER
BRAMAS Berenger's avatar
BRAMAS Berenger committed
560 561
                starpu_data_assign_arbiter(cellHandles[idxLevel][idxGroup].up, arbiterGlobal);
                starpu_data_assign_arbiter(cellHandles[idxLevel][idxGroup].down, arbiterGlobal);
562
#endif
BRAMAS Berenger's avatar
BRAMAS Berenger committed
563 564 565
            }
        }
        {
566
            particleHandles.resize(tree->getNbParticleGroup());
BRAMAS Berenger's avatar
BRAMAS Berenger committed
567 568
            for(int idxGroup = 0 ; idxGroup < tree->getNbParticleGroup() ; ++idxGroup){
                ParticleGroupClass* containers = tree->getParticleGroup(idxGroup);
569
                starpu_variable_data_register(&particleHandles[idxGroup].symb, 0,
570
                                              (uintptr_t)containers->getRawBuffer(), containers->getBufferSizeInByte());
571 572
                starpu_variable_data_register(&particleHandles[idxGroup].down, 0,
                                              (uintptr_t)containers->getRawAttributesBuffer(), containers->getAttributesBufferSizeInByte());
573
                particleHandles[idxGroup].intervalSize = int(containers->getNumberOfLeavesInBlock());
574
#ifdef STARPU_SUPPORT_ARBITER
BRAMAS Berenger's avatar
BRAMAS Berenger committed
575
                starpu_data_assign_arbiter(particleHandles[idxGroup].down, arbiterGlobal);
576
#endif
BRAMAS Berenger's avatar
BRAMAS Berenger committed
577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605
            }
        }
    }

    /**
     * This function is creating the interactions vector between blocks.
     * It fills externalInteractionsAllLevel and externalInteractionsLeafLevel.
     * Warning, the omp task for now are using the class attributes!
     *
     */
    void buildExternalInteractionVecs(){
        FLOG( FTic timer; FTic leafTimer; FTic cellTimer; );
        // Reset interactions
        externalInteractionsAllLevel.clear();
        externalInteractionsLeafLevel.clear();
        // One per level + leaf level
        externalInteractionsAllLevel.resize(tree->getHeight());

        // First leaf level
        {
            // We create one big vector per block
            externalInteractionsLeafLevel.resize(tree->getNbParticleGroup());

            for(int idxGroup = 0 ; idxGroup < tree->getNbParticleGroup() ; ++idxGroup){
                // Create the vector
                ParticleGroupClass* containers = tree->getParticleGroup(idxGroup);

                std::vector<BlockInteractions<ParticleGroupClass>>* externalInteractions = &externalInteractionsLeafLevel[idxGroup];

606
#pragma omp task default(none) firstprivate(idxGroup, containers, externalInteractions)
BRAMAS Berenger's avatar
BRAMAS Berenger committed
607 608 609 610 611
                { // Can be a task(inout:iterCells)
                    std::vector<OutOfBlockInteraction> outsideInteractions;
                    const MortonIndex blockStartIdx = containers->getStartingIndex();
                    const MortonIndex blockEndIdx   = containers->getEndingIndex();

612 613 614
                    for(int leafIdx = 0 ; leafIdx < containers->getNumberOfLeavesInBlock() ; ++leafIdx){
                        const MortonIndex mindex = containers->getLeafMortonIndex(leafIdx);
                        // ParticleContainerClass particles = containers->template getLeaf<ParticleContainerClass>(leafIdx);
BRAMAS Berenger's avatar
BRAMAS Berenger committed
615

616 617 618 619 620 621 622 623 624 625 626 627 628
                        MortonIndex interactionsIndexes[26];
                        int interactionsPosition[26];
                        FTreeCoordinate coord(mindex, tree->getHeight()-1);
                        int counter = coord.getNeighborsIndexes(tree->getHeight(),interactionsIndexes,interactionsPosition);

                        for(int idxInter = 0 ; idxInter < counter ; ++idxInter){
                            if( blockStartIdx <= interactionsIndexes[idxInter] && interactionsIndexes[idxInter] < blockEndIdx ){
                                // Inside block interaction, do nothing
                            }
                            else if(interactionsIndexes[idxInter] < mindex){
                                OutOfBlockInteraction property;
                                property.insideIndex = mindex;
                                property.outIndex    = interactionsIndexes[idxInter];
BRAMAS Berenger's avatar
BRAMAS Berenger committed
629
                                property.relativeOutPosition = interactionsPosition[idxInter];
630
                                property.insideIdxInBlock = leafIdx;
BRAMAS Berenger's avatar
BRAMAS Berenger committed
631
                                property.outsideIdxInBlock = -1;
632
                                outsideInteractions.push_back(property);
BRAMAS Berenger's avatar
BRAMAS Berenger committed
633 634 635 636 637 638 639 640 641 642
                            }
                        }
                    }

                    // Sort to match external order
                    FQuickSort<OutOfBlockInteraction, int>::QsSequential(outsideInteractions.data(),int(outsideInteractions.size()));

                    int currentOutInteraction = 0;
                    for(int idxLeftGroup = 0 ; idxLeftGroup < idxGroup && currentOutInteraction < int(outsideInteractions.size()) ; ++idxLeftGroup){
                        ParticleGroupClass* leftContainers = tree->getParticleGroup(idxLeftGroup);
BRAMAS Berenger's avatar
BRAMAS Berenger committed
643 644
                        const MortonIndex blockStartIdxOther    = leftContainers->getStartingIndex();
                        const MortonIndex blockEndIdxOther      = leftContainers->getEndingIndex();
BRAMAS Berenger's avatar
BRAMAS Berenger committed
645

BRAMAS Berenger's avatar
BRAMAS Berenger committed
646 647 648 649
                        while(currentOutInteraction < int(outsideInteractions.size())
                              && (outsideInteractions[currentOutInteraction].outIndex < blockStartIdxOther
                                  || leftContainers->getLeafIndex(outsideInteractions[currentOutInteraction].outIndex) == -1)
                              && outsideInteractions[currentOutInteraction].outIndex < blockEndIdxOther){
BRAMAS Berenger's avatar
BRAMAS Berenger committed
650 651 652 653
                            currentOutInteraction += 1;
                        }

                        int lastOutInteraction = currentOutInteraction;
BRAMAS Berenger's avatar
BRAMAS Berenger committed
654
                        int copyExistingInteraction = currentOutInteraction;
BRAMAS Berenger's avatar
BRAMAS Berenger committed
655
                        while(lastOutInteraction < int(outsideInteractions.size()) && outsideInteractions[lastOutInteraction].outIndex < blockEndIdxOther){
BRAMAS Berenger's avatar
BRAMAS Berenger committed
656 657 658 659 660 661 662 663
                            const int leafPos = leftContainers->getLeafIndex(outsideInteractions[lastOutInteraction].outIndex);
                            if(leafPos != -1){
                                if(copyExistingInteraction != lastOutInteraction){
                                    outsideInteractions[copyExistingInteraction] = outsideInteractions[lastOutInteraction];
                                }
                                outsideInteractions[copyExistingInteraction].outsideIdxInBlock = leafPos;
                                copyExistingInteraction += 1;
                            }
BRAMAS Berenger's avatar
BRAMAS Berenger committed
664 665 666
                            lastOutInteraction += 1;
                        }

BRAMAS Berenger's avatar
BRAMAS Berenger committed
667
                        const int nbInteractionsBetweenBlocks = (copyExistingInteraction-currentOutInteraction);
BRAMAS Berenger's avatar
BRAMAS Berenger committed
668 669 670 671 672 673 674
                        if(nbInteractionsBetweenBlocks){
                            externalInteractions->emplace_back();
                            BlockInteractions<ParticleGroupClass>* interactions = &externalInteractions->back();
                            interactions->otherBlock = leftContainers;
                            interactions->otherBlockId = idxLeftGroup;
                            interactions->interactions.resize(nbInteractionsBetweenBlocks);
                            std::copy(outsideInteractions.begin() + currentOutInteraction,
BRAMAS Berenger's avatar
BRAMAS Berenger committed
675
                                      outsideInteractions.begin() + copyExistingInteraction,
BRAMAS Berenger's avatar
BRAMAS Berenger committed
676 677 678 679 680 681 682 683 684 685 686 687 688 689 690
                                      interactions->interactions.begin());
                        }

                        currentOutInteraction = lastOutInteraction;
                    }
                }
            }
        }
        FLOG( leafTimer.tac(); );
        FLOG( cellTimer.tic(); );
        {
            for(int idxLevel = tree->getHeight()-1 ; idxLevel >= 2 ; --idxLevel){
                externalInteractionsAllLevel[idxLevel].resize(tree->getNbCellGroupAtLevel(idxLevel));

                for(int idxGroup = 0 ; idxGroup < tree->getNbCellGroupAtLevel(idxLevel) ; ++idxGroup){
691
                    CellContainerClass* currentCells = tree->getCellGroup(idxLevel, idxGroup);
BRAMAS Berenger's avatar
BRAMAS Berenger committed
692 693 694

                    std::vector<BlockInteractions<CellContainerClass>>* externalInteractions = &externalInteractionsAllLevel[idxLevel][idxGroup];

695
#pragma omp task default(none) firstprivate(idxGroup, currentCells, idxLevel, externalInteractions)
BRAMAS Berenger's avatar
BRAMAS Berenger committed
696 697 698 699 700
                    {
                        std::vector<OutOfBlockInteraction> outsideInteractions;
                        const MortonIndex blockStartIdx = currentCells->getStartingIndex();
                        const MortonIndex blockEndIdx   = currentCells->getEndingIndex();

701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716
                        for(int cellIdx = 0 ; cellIdx < currentCells->getNumberOfCellsInBlock() ; ++cellIdx){
                            const MortonIndex mindex = currentCells->getCellMortonIndex(cellIdx);

                            MortonIndex interactionsIndexes[189];
                            int interactionsPosition[189];
                            const FTreeCoordinate coord(mindex, idxLevel);
                            int counter = coord.getInteractionNeighbors(idxLevel,interactionsIndexes,interactionsPosition);

                            for(int idxInter = 0 ; idxInter < counter ; ++idxInter){
                                if( blockStartIdx <= interactionsIndexes[idxInter] && interactionsIndexes[idxInter] < blockEndIdx ){
                                    // Nothing to do
                                }
                                else if(interactionsIndexes[idxInter] < mindex){
                                    OutOfBlockInteraction property;
                                    property.insideIndex = mindex;
                                    property.outIndex    = interactionsIndexes[idxInter];
BRAMAS Berenger's avatar
BRAMAS Berenger committed
717
                                    property.relativeOutPosition = interactionsPosition[idxInter];
718
                                    property.insideIdxInBlock = cellIdx;
BRAMAS Berenger's avatar
BRAMAS Berenger committed
719
                                    property.outsideIdxInBlock = -1;
720
                                    outsideInteractions.push_back(property);
BRAMAS Berenger's avatar
BRAMAS Berenger committed
721 722 723 724 725 726 727 728 729 730
                                }
                            }
                        }

                        // Manage outofblock interaction
                        FQuickSort<OutOfBlockInteraction, int>::QsSequential(outsideInteractions.data(),int(outsideInteractions.size()));

                        int currentOutInteraction = 0;
                        for(int idxLeftGroup = 0 ; idxLeftGroup < idxGroup && currentOutInteraction < int(outsideInteractions.size()) ; ++idxLeftGroup){
                            CellContainerClass* leftCells   = tree->getCellGroup(idxLevel, idxLeftGroup);
BRAMAS Berenger's avatar
BRAMAS Berenger committed
731 732
                            const MortonIndex blockStartIdxOther = leftCells->getStartingIndex();
                            const MortonIndex blockEndIdxOther   = leftCells->getEndingIndex();
BRAMAS Berenger's avatar
BRAMAS Berenger committed
733

BRAMAS Berenger's avatar
BRAMAS Berenger committed
734 735 736 737
                            while(currentOutInteraction < int(outsideInteractions.size())
                                  && (outsideInteractions[currentOutInteraction].outIndex < blockStartIdxOther
                                      || leftCells->getCellIndex(outsideInteractions[currentOutInteraction].outIndex) == -1)
                                  && outsideInteractions[currentOutInteraction].outIndex < blockEndIdxOther){
BRAMAS Berenger's avatar
BRAMAS Berenger committed
738 739 740 741
                                currentOutInteraction += 1;
                            }

                            int lastOutInteraction = currentOutInteraction;
BRAMAS Berenger's avatar
BRAMAS Berenger committed
742
                            int copyExistingInteraction = currentOutInteraction;
BRAMAS Berenger's avatar
BRAMAS Berenger committed
743
                            while(lastOutInteraction < int(outsideInteractions.size()) && outsideInteractions[lastOutInteraction].outIndex < blockEndIdxOther){
BRAMAS Berenger's avatar
BRAMAS Berenger committed
744 745 746 747 748 749 750 751
                                const int cellPos = leftCells->getCellIndex(outsideInteractions[lastOutInteraction].outIndex);
                                if(cellPos != -1){
                                    if(copyExistingInteraction != lastOutInteraction){
                                        outsideInteractions[copyExistingInteraction] = outsideInteractions[lastOutInteraction];
                                    }
                                    outsideInteractions[copyExistingInteraction].outsideIdxInBlock = cellPos;
                                    copyExistingInteraction += 1;
                                }
BRAMAS Berenger's avatar
BRAMAS Berenger committed
752 753 754 755
                                lastOutInteraction += 1;
                            }

                            // Create interactions
BRAMAS Berenger's avatar
BRAMAS Berenger committed
756
                            const int nbInteractionsBetweenBlocks = (copyExistingInteraction-currentOutInteraction);
BRAMAS Berenger's avatar
BRAMAS Berenger committed
757 758 759 760 761 762 763
                            if(nbInteractionsBetweenBlocks){
                                externalInteractions->emplace_back();
                                BlockInteractions<CellContainerClass>* interactions = &externalInteractions->back();
                                interactions->otherBlock = leftCells;
                                interactions->otherBlockId = idxLeftGroup;
                                interactions->interactions.resize(nbInteractionsBetweenBlocks);
                                std::copy(outsideInteractions.begin() + currentOutInteraction,
BRAMAS Berenger's avatar
BRAMAS Berenger committed
764
                                          outsideInteractions.begin() + copyExistingInteraction,
BRAMAS Berenger's avatar
BRAMAS Berenger committed
765 766 767 768 769 770 771 772 773 774 775
                                          interactions->interactions.begin());
                            }

                            currentOutInteraction = lastOutInteraction;
                        }
                    }
                }
            }
        }
        FLOG( cellTimer.tac(); );

776
#pragma omp taskwait
BRAMAS Berenger's avatar
BRAMAS Berenger committed
777 778 779 780 781 782 783 784 785 786 787 788 789

        FLOG( FLog::Controller << "\t\t Prepare in " << timer.tacAndElapsed() << "s\n" );
        FLOG( FLog::Controller << "\t\t\t Prepare at leaf level in   " << leafTimer.elapsed() << "s\n" );
        FLOG( FLog::Controller << "\t\t\t Prepare at other levels in " << cellTimer.elapsed() << "s\n" );
    }

    /////////////////////////////////////////////////////////////////////////////////////
    /// Bottom Pass
    /////////////////////////////////////////////////////////////////////////////////////

    void bottomPass(){
        FLOG( FTic timer; );

BRAMAS Berenger's avatar
BRAMAS Berenger committed
790 791
        FAssertLF(cellHandles[tree->getHeight()-1].size() == particleHandles.size());

BRAMAS Berenger's avatar
BRAMAS Berenger committed
792 793
        for(int idxGroup = 0 ; idxGroup < tree->getNbParticleGroup() ; ++idxGroup){
            starpu_insert_task(&p2m_cl,
794 795
                               STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
                               STARPU_VALUE, &cellHandles[tree->getHeight()-1][idxGroup].intervalSize, sizeof(int),
796
                    STARPU_PRIORITY, FStarPUFmmPriorities::Controller().getInsertionPosP2M(),
797 798 799
                    STARPU_R, cellHandles[tree->getHeight()-1][idxGroup].symb,
                    STARPU_RW, cellHandles[tree->getHeight()-1][idxGroup].up,
                    STARPU_R, particleHandles[idxGroup].symb,
800
#ifdef STARPU_USE_TASK_NAME
801
                    STARPU_NAME, p2mTaskNames.get(),
802
#endif
BRAMAS Berenger's avatar
BRAMAS Berenger committed
803 804 805 806 807 808 809 810 811 812 813 814
                    0);
        }

        FLOG( FLog::Controller << "\t\t bottomPass in " << timer.tacAndElapsed() << "s\n" );
    }

    /////////////////////////////////////////////////////////////////////////////////////
    /// Upward Pass
    /////////////////////////////////////////////////////////////////////////////////////

    void upwardPass(){
        FLOG( FTic timer; );
815
        for(int idxLevel = FMath::Min(tree->getHeight() - 2, FAbstractAlgorithm::lowerWorkingLevel - 1) ; idxLevel >= FAbstractAlgorithm::upperWorkingLevel ; --idxLevel){
BRAMAS Berenger's avatar
BRAMAS Berenger committed
816 817 818 819 820 821 822 823 824 825 826
            int idxSubGroup = 0;

            for(int idxGroup = 0 ; idxGroup < tree->getNbCellGroupAtLevel(idxLevel) ; ++idxGroup){
                CellContainerClass*const currentCells = tree->getCellGroup(idxLevel, idxGroup);

                // Skip current group if needed
                if( tree->getCellGroup(idxLevel+1, idxSubGroup)->getEndingIndex() <= (currentCells->getStartingIndex()<<3) ){
                    ++idxSubGroup;
                    FAssertLF( idxSubGroup != tree->getNbCellGroupAtLevel(idxLevel+1) );
                    FAssertLF( (tree->getCellGroup(idxLevel+1, idxSubGroup)->getStartingIndex()>>3) == currentCells->getStartingIndex() );
                }
BRAMAS Berenger's avatar
BRAMAS Berenger committed
827

BRAMAS Berenger's avatar
BRAMAS Berenger committed
828
                // Copy at max 8 groups
BRAMAS Berenger's avatar
BRAMAS Berenger committed
829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852
                {

                    struct starpu_task* const task = starpu_task_create();
                    task->dyn_handles = (starpu_data_handle_t*)malloc(sizeof(starpu_data_handle_t)*20);
                    task->dyn_handles[0] = cellHandles[idxLevel][idxGroup].symb;
                    task->dyn_handles[1] = cellHandles[idxLevel][idxGroup].up;

                    task->dyn_handles[2] = cellHandles[idxLevel+1][idxSubGroup].symb;
                    task->dyn_handles[3] = cellHandles[idxLevel+1][idxSubGroup].up;

                    // put the right codelet
                    task->cl = &m2m_cl;
                    // put args values
                    char *arg_buffer;
                    size_t arg_buffer_size;
                    starpu_codelet_pack_args((void**)&arg_buffer, &arg_buffer_size,
                                             STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
                                             STARPU_VALUE, &idxLevel, sizeof(idxLevel),
                                             STARPU_VALUE, &cellHandles[idxLevel][idxGroup].intervalSize, sizeof(int),
                                             0);
                    task->cl_arg = arg_buffer;
                    task->cl_arg_size = arg_buffer_size;
                    task->priority = FStarPUFmmPriorities::Controller().getInsertionPosM2M(idxLevel);
    #ifdef STARPU_USE_TASK_NAME
853
                    task->name = m2mTaskNames[idxLevel].get();
BRAMAS Berenger's avatar
BRAMAS Berenger committed
854 855 856
    #endif
                    FAssertLF(starpu_task_submit(task) == 0);
                }
BRAMAS Berenger's avatar
BRAMAS Berenger committed
857 858

                while(tree->getCellGroup(idxLevel+1, idxSubGroup)->getEndingIndex() <= (((currentCells->getEndingIndex()-1)<<3)+7)
859
                      && (idxSubGroup+1) != tree->getNbCellGroupAtLevel(idxLevel+1)
BRAMAS Berenger's avatar
BRAMAS Berenger committed
860
                      && tree->getCellGroup(idxLevel+1, idxSubGroup+1)->getStartingIndex() <= ((currentCells->getEndingIndex()-1)<<3)+7 ){
861
                    idxSubGroup += 1;
BRAMAS Berenger's avatar
BRAMAS Berenger committed
862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884

                    struct starpu_task* const task = starpu_task_create();
                    task->dyn_handles = (starpu_data_handle_t*)malloc(sizeof(starpu_data_handle_t)*20);
                    task->dyn_handles[0] = cellHandles[idxLevel][idxGroup].symb;
                    task->dyn_handles[1] = cellHandles[idxLevel][idxGroup].up;

                    task->dyn_handles[2] = cellHandles[idxLevel+1][idxSubGroup].symb;
                    task->dyn_handles[3] = cellHandles[idxLevel+1][idxSubGroup].up;

                    // put the right codelet
                    task->cl = &m2m_cl;
                    // put args values
                    char *arg_buffer;
                    size_t arg_buffer_size;
                    starpu_codelet_pack_args((void**)&arg_buffer, &arg_buffer_size,
                                             STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
                                             STARPU_VALUE, &idxLevel, sizeof(idxLevel),
                                             STARPU_VALUE, &cellHandles[idxLevel][idxGroup].intervalSize, sizeof(int),
                                             0);
                    task->cl_arg = arg_buffer;
                    task->cl_arg_size = arg_buffer_size;
                    task->priority = FStarPUFmmPriorities::Controller().getInsertionPosM2M(idxLevel);
    #ifdef STARPU_USE_TASK_NAME
885
                    task->name = m2mTaskNames[idxLevel].get();
BRAMAS Berenger's avatar
BRAMAS Berenger committed
886 887
    #endif
                    FAssertLF(starpu_task_submit(task) == 0);
BRAMAS Berenger's avatar
BRAMAS Berenger committed
888 889 890 891 892 893 894 895 896 897 898 899 900 901
                }

            }
        }
        FLOG( FLog::Controller << "\t\t upwardPass in " << timer.tacAndElapsed() << "s\n" );
    }

    /////////////////////////////////////////////////////////////////////////////////////
    /// Transfer Pass
    /////////////////////////////////////////////////////////////////////////////////////

    void transferPass(){
        FLOG( FTic timer; );
        FLOG( FTic timerInBlock; FTic timerOutBlock; );
902
        for(int idxLevel = FAbstractAlgorithm::lowerWorkingLevel-1 ; idxLevel >= FAbstractAlgorithm::upperWorkingLevel ; --idxLevel){
BRAMAS Berenger's avatar
BRAMAS Berenger committed
903 904 905
            FLOG( timerInBlock.tic() );
            for(int idxGroup = 0 ; idxGroup < tree->getNbCellGroupAtLevel(idxLevel) ; ++idxGroup){
                starpu_insert_task(&m2l_cl_in,
906 907 908
                                   STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
                                   STARPU_VALUE, &idxLevel, sizeof(idxLevel),
                                   STARPU_VALUE, &cellHandles[idxLevel][idxGroup].intervalSize, sizeof(int),
909
                                   STARPU_PRIORITY, FStarPUFmmPriorities::Controller().getInsertionPosM2L(idxLevel),
910 911
                                   STARPU_R, cellHandles[idxLevel][idxGroup].symb,
                                   STARPU_R, cellHandles[idxLevel][idxGroup].up,
912
                                   (STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED), cellHandles[idxLevel][idxGroup].down,
913
                   #ifdef STARPU_USE_TASK_NAME
914
                                       STARPU_NAME, m2lTaskNames[idxLevel].get(),
915
                   #endif
916
                                   0);
BRAMAS Berenger's avatar
BRAMAS Berenger committed
917 918 919 920 921 922 923 924 925
            }
            FLOG( timerInBlock.tac() );

            FLOG( timerOutBlock.tic() );
            for(int idxGroup = 0 ; idxGroup < tree->getNbCellGroupAtLevel(idxLevel) ; ++idxGroup){
                for(int idxInteraction = 0; idxInteraction < int(externalInteractionsAllLevel[idxLevel][idxGroup].size()) ; ++idxInteraction){
                    const int interactionid = externalInteractionsAllLevel[idxLevel][idxGroup][idxInteraction].otherBlockId;
                    const std::vector<OutOfBlockInteraction>* outsideInteractions = &externalInteractionsAllLevel[idxLevel][idxGroup][idxInteraction].interactions;

BRAMAS Berenger's avatar
BRAMAS Berenger committed
926
                    int mode = 1;
BRAMAS Berenger's avatar
BRAMAS Berenger committed
927
                    starpu_insert_task(&m2l_cl_inout,
928 929 930 931
                                       STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
                                       STARPU_VALUE, &idxLevel, sizeof(idxLevel),
                                       STARPU_VALUE, &outsideInteractions, sizeof(outsideInteractions),
                                       STARPU_VALUE, &cellHandles[idxLevel][idxGroup].intervalSize, sizeof(int),
BRAMAS Berenger's avatar
BRAMAS Berenger committed
932
                                       STARPU_VALUE, &mode, sizeof(int),
933
                                       STARPU_PRIORITY, FStarPUFmmPriorities::Controller().getInsertionPosM2LExtern(idxLevel),
934 935 936 937
                                       STARPU_R, cellHandles[idxLevel][idxGroup].symb,
                                       (STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED), cellHandles[idxLevel][idxGroup].down,
                                       STARPU_R, cellHandles[idxLevel][interactionid].symb,
                                       STARPU_R, cellHandles[idxLevel][interactionid].up,
BRAMAS Berenger's avatar
BRAMAS Berenger committed
938
                   #ifdef STARPU_USE_TASK_NAME
939
                                       STARPU_NAME, m2lOuterTaskNames[idxLevel].get(),
BRAMAS Berenger's avatar
BRAMAS Berenger committed
940 941 942 943 944 945 946 947 948 949 950 951
                   #endif
                                       0);

                    mode = 2;
                    starpu_insert_task(&m2l_cl_inout,
                                       STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
                                       STARPU_VALUE, &idxLevel, sizeof(idxLevel),
                                       STARPU_VALUE, &outsideInteractions, sizeof(outsideInteractions),
                                       STARPU_VALUE, &cellHandles[idxLevel][idxGroup].intervalSize, sizeof(int),
                                       STARPU_VALUE, &mode, sizeof(int),
                                       STARPU_PRIORITY, FStarPUFmmPriorities::Controller().getInsertionPosM2LExtern(idxLevel),
                                       STARPU_R, cellHandles[idxLevel][interactionid].symb,
952
                                       (STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED), cellHandles[idxLevel][interactionid].down,
BRAMAS Berenger's avatar
BRAMAS Berenger committed
953 954
                                       STARPU_R, cellHandles[idxLevel][idxGroup].symb,
                                       STARPU_R, cellHandles[idxLevel][idxGroup].up,
955
                   #ifdef STARPU_USE_TASK_NAME
956
                                       STARPU_NAME, m2lOuterTaskNames[idxLevel].get(),
957
                   #endif
958
                                       0);
BRAMAS Berenger's avatar
BRAMAS Berenger committed
959 960 961 962 963 964 965 966 967 968 969 970 971 972 973
                }
            }
            FLOG( timerOutBlock.tac() );
        }
        FLOG( FLog::Controller << "\t\t transferPass in " << timer.tacAndElapsed() << "s\n" );
        FLOG( FLog::Controller << "\t\t\t inblock in  " << timerInBlock.elapsed() << "s\n" );
        FLOG( FLog::Controller << "\t\t\t outblock in " << timerOutBlock.elapsed() << "s\n" );
    }

    /////////////////////////////////////////////////////////////////////////////////////
    /// Downard Pass
    /////////////////////////////////////////////////////////////////////////////////////

    void downardPass(){
        FLOG(