FGroupTaskStarpuAlgorithm.hpp 74.2 KB
Newer Older
1
// Keep in private GIT
BRAMAS Berenger's avatar
BRAMAS Berenger committed
2 3 4
#ifndef FGROUPTASKSTARPUALGORITHM_HPP
#define FGROUPTASKSTARPUALGORITHM_HPP

5 6 7 8 9 10 11
#include "../../Utils/FGlobal.hpp"
#include "../../Core/FCoreCommon.hpp"
#include "../../Utils/FQuickSort.hpp"
#include "../../Containers/FTreeCoordinate.hpp"
#include "../../Utils/FLog.hpp"
#include "../../Utils/FTic.hpp"
#include "../../Utils/FAssert.hpp"
12
#include "../../Utils/FEnv.hpp"
BRAMAS Berenger's avatar
BRAMAS Berenger committed
13

14 15
#include "FOutOfBlockInteraction.hpp"

BRAMAS Berenger's avatar
BRAMAS Berenger committed
16
#include <vector>
17
#include <memory>
BRAMAS Berenger's avatar
BRAMAS Berenger committed
18 19 20 21

#include <omp.h>

#include <starpu.h>
22
#include "../StarPUUtils/FStarPUUtils.hpp"
23
#include "../StarPUUtils/FStarPUFmmPriorities.hpp"
24
#include "../StarPUUtils/FStarPUFmmPrioritiesV2.hpp"
25
#include "../StarPUUtils/FStarPUReduxCpu.hpp"
BRAMAS Berenger's avatar
BRAMAS Berenger committed
26

27
#ifdef STARPU_USE_CPU
28
#include "../StarPUUtils/FStarPUCpuWrapper.hpp"
29
#endif
30
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
31 32 33 34 35
#include "../StarPUUtils/FStarPUCudaWrapper.hpp"
#include "../Cuda/FCudaEmptyKernel.hpp"
#include "../Cuda/FCudaGroupAttachedLeaf.hpp"
#include "../Cuda/FCudaGroupOfParticles.hpp"
#include "../Cuda/FCudaGroupOfCells.hpp"
36
#include "../Cuda/FCudaEmptyCellSymb.hpp"
37
#endif
38
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
39 40
#include "../StarPUUtils/FStarPUOpenClWrapper.hpp"
#include "../OpenCl/FOpenCLDeviceWrapper.hpp"
41 42
#endif

43 44 45
#ifdef SCALFMM_SIMGRID_TASKNAMEPARAMS
#include "../StarPUUtils/FStarPUTaskNameParams.hpp"
#endif
46

BRAMAS Berenger's avatar
BRAMAS Berenger committed
47 48
#include "Containers/FBoolArray.hpp"

49
template <class OctreeClass, class CellContainerClass, class KernelClass, class ParticleGroupClass, class StarPUCpuWrapperClass
50 51 52 53 54 55 56
          #ifdef SCALFMM_ENABLE_CUDA_KERNEL
          , class StarPUCudaWrapperClass = FStarPUCudaWrapper<KernelClass, FCudaEmptyCellSymb, int, int, FCudaGroupOfCells<FCudaEmptyCellSymb, int, int>,
                                                              FCudaGroupOfParticles<int, 0, 0, int>, FCudaGroupAttachedLeaf<int, 0, 0, int>, FCudaEmptyKernel<int> >
          #endif
          #ifdef SCALFMM_ENABLE_OPENCL_KERNEL
          , class StarPUOpenClWrapperClass = FStarPUOpenClWrapper<KernelClass, FOpenCLDeviceWrapper<KernelClass>>
          #endif
57
          >
58
class FGroupTaskStarPUAlgorithm : public FAbstractAlgorithm {
BRAMAS Berenger's avatar
BRAMAS Berenger committed
59
protected:
60
    typedef FGroupTaskStarPUAlgorithm<OctreeClass, CellContainerClass, KernelClass, ParticleGroupClass, StarPUCpuWrapperClass
61
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
62
    , StarPUCudaWrapperClass
63
#endif
64
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
65
    , StarPUOpenClWrapperClass
66 67
#endif
    > ThisClass;
BRAMAS Berenger's avatar
BRAMAS Berenger committed
68 69 70 71 72 73 74 75

    template <class OtherBlockClass>
    struct BlockInteractions{
        OtherBlockClass* otherBlock;
        int otherBlockId;
        std::vector<OutOfBlockInteraction> interactions;
    };

76 77 78 79
    struct CellHandles{
        starpu_data_handle_t symb;
        starpu_data_handle_t up;
        starpu_data_handle_t down;
80
        int intervalSize;
81 82 83 84 85
    };

    struct ParticleHandles{
        starpu_data_handle_t symb;
        starpu_data_handle_t down;
86
        int intervalSize;
87 88
    };

BRAMAS Berenger's avatar
BRAMAS Berenger committed
89 90 91 92
    std::vector< std::vector< std::vector<BlockInteractions<CellContainerClass>>>> externalInteractionsAllLevel;
    std::vector< std::vector<BlockInteractions<ParticleGroupClass>>> externalInteractionsLeafLevel;

    OctreeClass*const tree;       //< The Tree
93
    KernelClass*const originalCpuKernel;
BRAMAS Berenger's avatar
BRAMAS Berenger committed
94

95 96
    std::vector<CellHandles>* cellHandles;
    std::vector<ParticleHandles> particleHandles;
BRAMAS Berenger's avatar
BRAMAS Berenger committed
97 98

    starpu_codelet p2m_cl;
BRAMAS Berenger's avatar
BRAMAS Berenger committed
99 100
    starpu_codelet m2m_cl;
    starpu_codelet l2l_cl;
BRAMAS Berenger's avatar
BRAMAS Berenger committed
101
    starpu_codelet l2l_cl_nocommute;
BRAMAS Berenger's avatar
BRAMAS Berenger committed
102 103 104 105 106 107 108 109
    starpu_codelet l2p_cl;

    starpu_codelet m2l_cl_in;
    starpu_codelet m2l_cl_inout;

    starpu_codelet p2p_cl_in;
    starpu_codelet p2p_cl_inout;

110 111 112 113 114 115
#ifdef STARPU_USE_REDUX
    starpu_codelet p2p_redux_init;
    starpu_codelet p2p_redux_perform;
    starpu_codelet p2p_redux_read;
#endif

116
    const bool noCommuteAtLastLevel;
117
    const bool noCommuteBetweenLevel;
118

BRAMAS Berenger's avatar
BRAMAS Berenger committed
119
#ifdef STARPU_USE_CPU
120
    StarPUCpuWrapperClass cpuWrapper;
121
#endif
122
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
123 124
    StarPUCudaWrapperClass cudaWrapper;
#endif
125
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
126 127
    StarPUOpenClWrapperClass openclWrapper;
#endif
128 129 130

    FStarPUPtrInterface wrappers;
    FStarPUPtrInterface* wrapperptr;
131

132
#ifdef STARPU_SUPPORT_ARBITER
BRAMAS Berenger's avatar
BRAMAS Berenger committed
133
    starpu_arbiter_t arbiterGlobal;
134
#endif
135 136

#ifdef STARPU_USE_TASK_NAME
137
#ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS
138 139 140 141 142 143 144 145
    std::vector<std::unique_ptr<char[]>> m2mTaskNames;
    std::vector<std::unique_ptr<char[]>> m2lTaskNames;
    std::vector<std::unique_ptr<char[]>> m2lOuterTaskNames;
    std::vector<std::unique_ptr<char[]>> l2lTaskNames;
    std::unique_ptr<char[]> p2mTaskNames;
    std::unique_ptr<char[]> l2pTaskNames;
    std::unique_ptr<char[]> p2pTaskNames;
    std::unique_ptr<char[]> p2pOuterTaskNames;
146 147 148
#else
    FStarPUTaskNameParams taskNames;
#endif
149
#endif
150 151 152
#ifdef SCALFMM_STARPU_USE_PRIO
    typedef FStarPUFmmPrioritiesV2 PrioClass;// FStarPUFmmPriorities
#endif
153

BRAMAS Berenger's avatar
BRAMAS Berenger committed
154
public:
155 156
    FGroupTaskStarPUAlgorithm(OctreeClass*const inTree, KernelClass* inKernels)
        : tree(inTree), originalCpuKernel(inKernels),
157
          cellHandles(nullptr),          
158 159
          noCommuteAtLastLevel(FEnv::GetBool("SCALFMM_NO_COMMUTE_LAST_L2L", true)),
          noCommuteBetweenLevel(FEnv::GetBool("SCALFMM_NO_COMMUTE_M2L_L2L", false)),
160 161 162 163 164 165 166 167 168 169
      #ifdef STARPU_USE_CPU
          cpuWrapper(tree->getHeight()),
      #endif
      #ifdef SCALFMM_ENABLE_CUDA_KERNEL
          cudaWrapper(tree->getHeight()),
      #endif
      #ifdef SCALFMM_ENABLE_OPENCL_KERNEL
          openclWrapper(tree->getHeight()),
      #endif
          wrapperptr(&wrappers){
BRAMAS Berenger's avatar
BRAMAS Berenger committed
170 171 172
        FAssertLF(tree, "tree cannot be null");
        FAssertLF(inKernels, "kernels cannot be null");

173 174
        FAbstractAlgorithm::setNbLevelsInTree(tree->getHeight());

175 176
        struct starpu_conf conf;
        FAssertLF(starpu_conf_init(&conf) == 0);
177
#ifdef SCALFMM_STARPU_USE_PRIO
178
        PrioClass::Controller().init(&conf, tree->getHeight(), inKernels);
179
#endif
180
        FAssertLF(starpu_init(&conf) == 0);
181

182 183
        starpu_malloc_set_align(32);

184 185
        starpu_pthread_mutex_t initMutex;
        starpu_pthread_mutex_init(&initMutex, NULL);
186
#ifdef STARPU_USE_CPU
187 188 189 190 191
        FStarPUUtils::ExecOnWorkers(STARPU_CPU, [&](){
            starpu_pthread_mutex_lock(&initMutex);
            cpuWrapper.initKernel(starpu_worker_get_id(), inKernels);
            starpu_pthread_mutex_unlock(&initMutex);
        });
192
        wrappers.set(FSTARPU_CPU_IDX, &cpuWrapper);
193
#endif
194
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
195 196 197 198 199 200 201
        FStarPUUtils::ExecOnWorkers(STARPU_CUDA, [&](){
            starpu_pthread_mutex_lock(&initMutex);
            cudaWrapper.initKernel(starpu_worker_get_id(), inKernels);
            starpu_pthread_mutex_unlock(&initMutex);
        });
        wrappers.set(FSTARPU_CUDA_IDX, &cudaWrapper);
#endif
202
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
203 204 205 206 207 208
        FStarPUUtils::ExecOnWorkers(STARPU_OPENCL, [&](){
            starpu_pthread_mutex_lock(&initMutex);
            openclWrapper.initKernel(starpu_worker_get_id(), inKernels);
            starpu_pthread_mutex_unlock(&initMutex);
        });
        wrappers.set(FSTARPU_OPENCL_IDX, &openclWrapper);
209
#endif
210 211
        starpu_pthread_mutex_destroy(&initMutex);

212
        starpu_pause();
213

214
        cellHandles   = new std::vector<CellHandles>[tree->getHeight()];
215

216
#ifdef STARPU_SUPPORT_ARBITER
BRAMAS Berenger's avatar
BRAMAS Berenger committed
217
        arbiterGlobal = starpu_arbiter_create();
218 219
#endif

220 221 222
        initCodelet();
        rebuildInteractions();

BRAMAS Berenger's avatar
BRAMAS Berenger committed
223
        FLOG(FLog::Controller << "FGroupTaskStarPUAlgorithm (Max Worker " << starpu_worker_get_count() << ")\n");
BRAMAS Berenger's avatar
BRAMAS Berenger committed
224
#ifdef STARPU_USE_CPU
BRAMAS Berenger's avatar
BRAMAS Berenger committed
225
        FLOG(FLog::Controller << "FGroupTaskStarPUAlgorithm (Max CPU " << starpu_cpu_worker_get_count() << ")\n");
BRAMAS Berenger's avatar
BRAMAS Berenger committed
226
#endif
227
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
BRAMAS Berenger's avatar
BRAMAS Berenger committed
228 229
        FLOG(FLog::Controller << "FGroupTaskStarPUAlgorithm (Max OpenCL " << starpu_opencl_worker_get_count() << ")\n");
#endif
230
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
BRAMAS Berenger's avatar
BRAMAS Berenger committed
231 232
        FLOG(FLog::Controller << "FGroupTaskStarPUAlgorithm (Max CUDA " << starpu_cuda_worker_get_count() << ")\n");
#endif
233
        FLOG(FLog::Controller << "SCALFMM_NO_COMMUTE_LAST_L2L " << noCommuteAtLastLevel << "\n");
234
        FLOG(FLog::Controller << "SCALFMM_NO_COMMUTE_M2L_L2L " << noCommuteBetweenLevel << "\n");
235 236 237 238 239 240

        buildTaskNames();
    }

    void buildTaskNames(){
#ifdef STARPU_USE_TASK_NAME
241
#ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS
242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265
        const int namesLength = 128;
        m2mTaskNames.resize(tree->getHeight());
        m2lTaskNames.resize(tree->getHeight());
        m2lOuterTaskNames.resize(tree->getHeight());
        l2lTaskNames.resize(tree->getHeight());
        for(int idxLevel = 0 ; idxLevel < tree->getHeight() ; ++idxLevel){
            m2mTaskNames[idxLevel].reset(new char[namesLength]);
            snprintf(m2mTaskNames[idxLevel].get(), namesLength, "M2M-level-%d", idxLevel);
            m2lTaskNames[idxLevel].reset(new char[namesLength]);
            snprintf(m2lTaskNames[idxLevel].get(), namesLength, "M2L-level-%d", idxLevel);
            m2lOuterTaskNames[idxLevel].reset(new char[namesLength]);
            snprintf(m2lOuterTaskNames[idxLevel].get(), namesLength, "M2L-out-level-%d", idxLevel);
            l2lTaskNames[idxLevel].reset(new char[namesLength]);
            snprintf(l2lTaskNames[idxLevel].get(), namesLength, "L2L-level-%d", idxLevel);
        }

        p2mTaskNames.reset(new char[namesLength]);
        snprintf(p2mTaskNames.get(), namesLength, "P2M");
        l2pTaskNames.reset(new char[namesLength]);
        snprintf(l2pTaskNames.get(), namesLength, "L2P");
        p2pTaskNames.reset(new char[namesLength]);
        snprintf(p2pTaskNames.get(), namesLength, "P2P");
        p2pOuterTaskNames.reset(new char[namesLength]);
        snprintf(p2pOuterTaskNames.get(), namesLength, "P2P-out");
266
#endif
267
#endif
BRAMAS Berenger's avatar
BRAMAS Berenger committed
268 269
    }

270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290
    void syncData(){
        for(int idxLevel = 0 ; idxLevel < tree->getHeight() ; ++idxLevel){
            for(int idxHandle = 0 ; idxHandle < int(cellHandles[idxLevel].size()) ; ++idxHandle){
                starpu_data_acquire(cellHandles[idxLevel][idxHandle].symb, STARPU_R);
                starpu_data_release(cellHandles[idxLevel][idxHandle].symb);
                starpu_data_acquire(cellHandles[idxLevel][idxHandle].up, STARPU_R);
                starpu_data_release(cellHandles[idxLevel][idxHandle].up);
                starpu_data_acquire(cellHandles[idxLevel][idxHandle].down, STARPU_R);
                starpu_data_release(cellHandles[idxLevel][idxHandle].down);
            }
        }
        {
            for(int idxHandle = 0 ; idxHandle < int(particleHandles.size()) ; ++idxHandle){
                starpu_data_acquire(particleHandles[idxHandle].symb, STARPU_R);
                starpu_data_release(particleHandles[idxHandle].symb);
                starpu_data_acquire(particleHandles[idxHandle].down, STARPU_R);
                starpu_data_release(particleHandles[idxHandle].down);
            }
        }
    }

BRAMAS Berenger's avatar
BRAMAS Berenger committed
291
    ~FGroupTaskStarPUAlgorithm(){
292
        starpu_resume();
BRAMAS Berenger's avatar
BRAMAS Berenger committed
293

BRAMAS Berenger's avatar
BRAMAS Berenger committed
294
        cleanHandle();
295
        delete[] cellHandles;
BRAMAS Berenger's avatar
BRAMAS Berenger committed
296

BRAMAS Berenger's avatar
BRAMAS Berenger committed
297 298 299 300 301 302 303 304 305 306
        starpu_pthread_mutex_t releaseMutex;
        starpu_pthread_mutex_init(&releaseMutex, NULL);
#ifdef STARPU_USE_CPU
        FStarPUUtils::ExecOnWorkers(STARPU_CPU, [&](){
            starpu_pthread_mutex_lock(&releaseMutex);
            cpuWrapper.releaseKernel(starpu_worker_get_id());
            starpu_pthread_mutex_unlock(&releaseMutex);
        });
        wrappers.set(FSTARPU_CPU_IDX, &cpuWrapper);
#endif
307
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
BRAMAS Berenger's avatar
BRAMAS Berenger committed
308 309 310 311 312 313 314
        FStarPUUtils::ExecOnWorkers(STARPU_CUDA, [&](){
            starpu_pthread_mutex_lock(&releaseMutex);
            cudaWrapper.releaseKernel(starpu_worker_get_id());
            starpu_pthread_mutex_unlock(&releaseMutex);
        });
        wrappers.set(FSTARPU_CUDA_IDX, &cudaWrapper);
#endif
315
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
BRAMAS Berenger's avatar
BRAMAS Berenger committed
316 317 318 319 320 321 322 323 324
        FStarPUUtils::ExecOnWorkers(STARPU_OPENCL, [&](){
            starpu_pthread_mutex_lock(&releaseMutex);
            openclWrapper.releaseKernel(starpu_worker_get_id());
            starpu_pthread_mutex_unlock(&releaseMutex);
        });
        wrappers.set(FSTARPU_OPENCL_IDX, &openclWrapper);
#endif
        starpu_pthread_mutex_destroy(&releaseMutex);

325 326

#ifdef STARPU_SUPPORT_ARBITER
BRAMAS Berenger's avatar
BRAMAS Berenger committed
327
        starpu_arbiter_destroy(arbiterGlobal);
328 329
#endif

BRAMAS Berenger's avatar
BRAMAS Berenger committed
330 331 332
        starpu_shutdown();
    }

BRAMAS Berenger's avatar
BRAMAS Berenger committed
333
    void rebuildInteractions(){
334
        FAssertLF(getenv("OMP_WAIT_POLICY") == nullptr
BRAMAS Berenger's avatar
BRAMAS Berenger committed
335 336
                || strcmp(getenv("OMP_WAIT_POLICY"), "PASSIVE") == 0
                || strcmp(getenv("OMP_WAIT_POLICY"), "passive") == 0);
BRAMAS Berenger's avatar
BRAMAS Berenger committed
337

338 339
        #pragma omp parallel
        #pragma omp single
BRAMAS Berenger's avatar
BRAMAS Berenger committed
340 341 342 343 344
        buildExternalInteractionVecs();

        buildHandles();
    }

BRAMAS Berenger's avatar
BRAMAS Berenger committed
345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389

#ifdef STARPU_USE_CPU
    void forEachCpuWorker(std::function<void(void)> func){
        starpu_resume();
        FStarPUUtils::ExecOnWorkers(STARPU_CPU, func);
        starpu_pause();
    }

    void forEachCpuWorker(std::function<void(KernelClass*)> func){
        starpu_resume();
        FStarPUUtils::ExecOnWorkers(STARPU_CPU, [&](){
            func(cpuWrapper.getKernel(starpu_worker_get_id()));
        });
        starpu_pause();
    }
#endif
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
    void forEachCudaWorker(std::function<void(void)> func){
        starpu_resume();
        FStarPUUtils::ExecOnWorkers(STARPU_CUDA, func);
        starpu_pause();
    }
    void forEachCudaWorker(std::function<void(void*)> func){
        starpu_resume();
        FStarPUUtils::ExecOnWorkers(STARPU_CUDA, [&](){
            func(cudaWrapper.getKernel(starpu_worker_get_id()));
        });
        starpu_pause();
    }
#endif
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
    void forEachOpenCLWorker(std::function<void(void)> func){
        starpu_resume();
        FStarPUUtils::ExecOnWorkers(STARPU_OPENCL, func);
        starpu_pause();
    }
    void forEachOpenCLWorker(std::function<void(void*)> func){
        starpu_resume();
        FStarPUUtils::ExecOnWorkers(STARPU_OPENCL, [&](){
            func(openclWrapper.getKernel(starpu_worker_get_id()));
        });
        starpu_pause();
    }
#endif

390 391 392 393 394
protected:
    /**
      * Runs the complete algorithm.
      */
    void executeCore(const unsigned operationsToProceed) override {
BRAMAS Berenger's avatar
BRAMAS Berenger committed
395
        FLOG( FLog::Controller << "\tStart FGroupTaskStarPUAlgorithm\n" );
396
        const bool directOnly = (tree->getHeight() <= 2);
BRAMAS Berenger's avatar
BRAMAS Berenger committed
397

398 399 400 401
#ifdef STARPU_USE_CPU
        FTIME_TASKS(cpuWrapper.taskTimeRecorder.start());
#endif

402
        starpu_resume();
403
        FLOG( FTic timerSoumission; );
BRAMAS Berenger's avatar
BRAMAS Berenger committed
404

405 406
        if( operationsToProceed & FFmmP2P ) directPass();

407
        if(operationsToProceed & FFmmP2M && !directOnly) bottomPass();
BRAMAS Berenger's avatar
BRAMAS Berenger committed
408

409
        if(operationsToProceed & FFmmM2M && !directOnly) upwardPass();
BRAMAS Berenger's avatar
BRAMAS Berenger committed
410

BRAMAS Berenger's avatar
BRAMAS Berenger committed
411
        if(operationsToProceed & FFmmM2L && !directOnly) transferPass(FAbstractAlgorithm::upperWorkingLevel, FAbstractAlgorithm::lowerWorkingLevel-1 , true, true);
BRAMAS Berenger's avatar
BRAMAS Berenger committed
412

413
        if(operationsToProceed & FFmmL2L && !directOnly) downardPass();
BRAMAS Berenger's avatar
BRAMAS Berenger committed
414

BRAMAS Berenger's avatar
BRAMAS Berenger committed
415 416
        if(operationsToProceed & FFmmM2L && !directOnly) transferPass(FAbstractAlgorithm::lowerWorkingLevel-1, FAbstractAlgorithm::lowerWorkingLevel, true, true);

417
        if( operationsToProceed & FFmmL2P && !directOnly) mergePass();
418 419 420
#ifdef STARPU_USE_REDUX
        if( operationsToProceed & FFmmL2P && !directOnly) readParticle();
#endif
BRAMAS Berenger's avatar
BRAMAS Berenger committed
421

422
        FLOG( FLog::Controller << "\t\t Submitting the tasks took " << timerSoumission.tacAndElapsed() << "s\n" );
BRAMAS Berenger's avatar
BRAMAS Berenger committed
423
        starpu_task_wait_for_all();
424 425 426 427 428

        FLOG( FTic timerSync; );
        syncData();
        FLOG( FLog::Controller << "\t\t Moving data to the host took " << timerSync.tacAndElapsed() << "s\n" );

429
        starpu_pause();
430 431 432 433 434

#ifdef STARPU_USE_CPU
        FTIME_TASKS(cpuWrapper.taskTimeRecorder.end());
        FTIME_TASKS(cpuWrapper.taskTimeRecorder.saveToDisk("/tmp/taskstime-FGroupTaskStarPUAlgorithm.txt"));
#endif
BRAMAS Berenger's avatar
BRAMAS Berenger committed
435 436
    }

437

BRAMAS Berenger's avatar
BRAMAS Berenger committed
438 439
    void initCodelet(){
        memset(&p2m_cl, 0, sizeof(p2m_cl));
440
#ifdef STARPU_USE_CPU
441
        if(originalCpuKernel->supportP2M(FSTARPU_CPU_IDX)){
442 443 444
            p2m_cl.cpu_funcs[0] = StarPUCpuWrapperClass::bottomPassCallback;
            p2m_cl.where |= STARPU_CPU;
        }
445
#endif
446
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
447
        if(originalCpuKernel->supportP2M(FSTARPU_CUDA_IDX)){
448 449 450 451
            p2m_cl.cuda_funcs[0] = StarPUCudaWrapperClass::bottomPassCallback;
            p2m_cl.where |= STARPU_CUDA;
        }
#endif
452
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
453
        if(originalCpuKernel->supportP2M(FSTARPU_OPENCL_IDX)){
454 455 456
            p2m_cl.opencl_funcs[0] = StarPUOpenClWrapperClass::bottomPassCallback;
            p2m_cl.where |= STARPU_OPENCL;
        }
457
#endif
458 459 460 461
        p2m_cl.nbuffers = 3;
        p2m_cl.modes[0] = STARPU_R;
        p2m_cl.modes[1] = STARPU_RW;
        p2m_cl.modes[2] = STARPU_R;
462
        p2m_cl.name = "p2m_cl";
BRAMAS Berenger's avatar
BRAMAS Berenger committed
463

BRAMAS Berenger's avatar
BRAMAS Berenger committed
464
        memset(&m2m_cl, 0, sizeof(m2m_cl));
465
#ifdef STARPU_USE_CPU
BRAMAS Berenger's avatar
BRAMAS Berenger committed
466 467 468 469
        if(originalCpuKernel->supportM2M(FSTARPU_CPU_IDX)){
            m2m_cl.cpu_funcs[0] = StarPUCpuWrapperClass::upwardPassCallback;
            m2m_cl.where |= STARPU_CPU;
        }
470
#endif
471
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
BRAMAS Berenger's avatar
BRAMAS Berenger committed
472 473 474 475
        if(originalCpuKernel->supportM2M(FSTARPU_CUDA_IDX)){
            m2m_cl.cuda_funcs[0] = StarPUCudaWrapperClass::upwardPassCallback;
            m2m_cl.where |= STARPU_CUDA;
        }
476
#endif
477
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
BRAMAS Berenger's avatar
BRAMAS Berenger committed
478 479 480 481
        if(originalCpuKernel->supportM2M(FSTARPU_OPENCL_IDX)){
            m2m_cl.opencl_funcs[0] = StarPUOpenClWrapperClass::upwardPassCallback;
            m2m_cl.where |= STARPU_OPENCL;
        }
482
#endif
BRAMAS Berenger's avatar
BRAMAS Berenger committed
483 484 485 486 487 488 489
        m2m_cl.nbuffers = 4;
        m2m_cl.dyn_modes = (starpu_data_access_mode*)malloc(m2m_cl.nbuffers*sizeof(starpu_data_access_mode));
        m2m_cl.dyn_modes[0] = STARPU_R;
        m2m_cl.dyn_modes[1] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED);
        m2m_cl.name = "m2m_cl";
        m2m_cl.dyn_modes[2] = STARPU_R;
        m2m_cl.dyn_modes[3] = STARPU_R;
BRAMAS Berenger's avatar
BRAMAS Berenger committed
490

BRAMAS Berenger's avatar
BRAMAS Berenger committed
491
        memset(&l2l_cl, 0, sizeof(l2l_cl));
492
#ifdef STARPU_USE_CPU
BRAMAS Berenger's avatar
BRAMAS Berenger committed
493 494 495 496
        if(originalCpuKernel->supportL2L(FSTARPU_CPU_IDX)){
            l2l_cl.cpu_funcs[0] = StarPUCpuWrapperClass::downardPassCallback;
            l2l_cl.where |= STARPU_CPU;
        }
497
#endif
498
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
BRAMAS Berenger's avatar
BRAMAS Berenger committed
499 500 501 502
        if(originalCpuKernel->supportL2L(FSTARPU_CUDA_IDX)){
            l2l_cl.cuda_funcs[0] = StarPUCudaWrapperClass::downardPassCallback;
            l2l_cl.where |= STARPU_CUDA;
        }
503
#endif
504
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
BRAMAS Berenger's avatar
BRAMAS Berenger committed
505 506 507
        if(originalCpuKernel->supportL2L(FSTARPU_OPENCL_IDX)){
            l2l_cl.opencl_funcs[0] = StarPUOpenClWrapperClass::downardPassCallback;
            l2l_cl.where |= STARPU_OPENCL;
BRAMAS Berenger's avatar
BRAMAS Berenger committed
508
        }
BRAMAS Berenger's avatar
BRAMAS Berenger committed
509 510 511 512 513 514 515 516
#endif
        l2l_cl.nbuffers = 4;
        l2l_cl.dyn_modes = (starpu_data_access_mode*)malloc(l2l_cl.nbuffers*sizeof(starpu_data_access_mode));
        l2l_cl.dyn_modes[0] = STARPU_R;
        l2l_cl.dyn_modes[1] = STARPU_R;
        l2l_cl.name = "l2l_cl";
        l2l_cl.dyn_modes[2] = STARPU_R;
        l2l_cl.dyn_modes[3] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED);
BRAMAS Berenger's avatar
BRAMAS Berenger committed
517

BRAMAS Berenger's avatar
BRAMAS Berenger committed
518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544
        memset(&l2l_cl_nocommute, 0, sizeof(l2l_cl_nocommute));
#ifdef STARPU_USE_CPU
        if(originalCpuKernel->supportL2L(FSTARPU_CPU_IDX)){
            l2l_cl_nocommute.cpu_funcs[0] = StarPUCpuWrapperClass::downardPassCallback;
            l2l_cl_nocommute.where |= STARPU_CPU;
        }
#endif
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
        if(originalCpuKernel->supportL2L(FSTARPU_CUDA_IDX)){
            l2l_cl_nocommute.cuda_funcs[0] = StarPUCudaWrapperClass::downardPassCallback;
            l2l_cl_nocommute.where |= STARPU_CUDA;
        }
#endif
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
        if(originalCpuKernel->supportL2L(FSTARPU_OPENCL_IDX)){
            l2l_cl_nocommute.opencl_funcs[0] = StarPUOpenClWrapperClass::downardPassCallback;
            l2l_cl_nocommute.where |= STARPU_OPENCL;
        }
#endif
        l2l_cl_nocommute.nbuffers = 4;
        l2l_cl_nocommute.dyn_modes = (starpu_data_access_mode*)malloc(l2l_cl_nocommute.nbuffers*sizeof(starpu_data_access_mode));
        l2l_cl_nocommute.dyn_modes[0] = STARPU_R;
        l2l_cl_nocommute.dyn_modes[1] = STARPU_R;
        l2l_cl_nocommute.name = "l2l_cl";
        l2l_cl_nocommute.dyn_modes[2] = STARPU_R;
        l2l_cl_nocommute.dyn_modes[3] = STARPU_RW;

BRAMAS Berenger's avatar
BRAMAS Berenger committed
545
        memset(&l2p_cl, 0, sizeof(l2p_cl));
546
#ifdef STARPU_USE_CPU
547
        if(originalCpuKernel->supportL2P(FSTARPU_CPU_IDX)){
548 549 550
            l2p_cl.cpu_funcs[0] = StarPUCpuWrapperClass::mergePassCallback;
            l2p_cl.where |= STARPU_CPU;
        }
551
#endif
552
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
553
        if(originalCpuKernel->supportL2P(FSTARPU_CUDA_IDX)){
554 555 556 557
            l2p_cl.cuda_funcs[0] = StarPUCudaWrapperClass::mergePassCallback;
            l2p_cl.where |= STARPU_CUDA;
        }
#endif
558
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
559
        if(originalCpuKernel->supportL2P(FSTARPU_OPENCL_IDX)){
560 561 562
            l2p_cl.opencl_funcs[0] = StarPUOpenClWrapperClass::mergePassCallback;
            l2p_cl.where |= STARPU_OPENCL;
        }
563
#endif
564
        l2p_cl.nbuffers = 4;
BRAMAS Berenger's avatar
BRAMAS Berenger committed
565
        l2p_cl.modes[0] = STARPU_R;
566 567
        l2p_cl.modes[1] = STARPU_R;
        l2p_cl.modes[2] = STARPU_R;
568 569 570
#ifdef STARPU_USE_REDUX
        l2p_cl.modes[3] = STARPU_REDUX;
#else
571
        l2p_cl.modes[3] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED);
572
#endif
573
        l2p_cl.name = "l2p_cl";
BRAMAS Berenger's avatar
BRAMAS Berenger committed
574 575

        memset(&p2p_cl_in, 0, sizeof(p2p_cl_in));
576
#ifdef STARPU_USE_CPU
577
        if(originalCpuKernel->supportP2P(FSTARPU_CPU_IDX)){
578 579 580
            p2p_cl_in.cpu_funcs[0] = StarPUCpuWrapperClass::directInPassCallback;
            p2p_cl_in.where |= STARPU_CPU;
        }
581
#endif
582
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
583
        if(originalCpuKernel->supportP2P(FSTARPU_CUDA_IDX)){
584 585 586 587
            p2p_cl_in.cuda_funcs[0] = StarPUCudaWrapperClass::directInPassCallback;
            p2p_cl_in.where |= STARPU_CUDA;
        }
#endif
588
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
589
        if(originalCpuKernel->supportP2P(FSTARPU_OPENCL_IDX)){
590 591 592
            p2p_cl_in.opencl_funcs[0] = StarPUOpenClWrapperClass::directInPassCallback;
            p2p_cl_in.where |= STARPU_OPENCL;
        }
593
#endif
594 595
        p2p_cl_in.nbuffers = 2;
        p2p_cl_in.modes[0] = STARPU_R;
596 597 598
#ifdef STARPU_USE_REDUX
        p2p_cl_in.modes[1] = STARPU_REDUX;
#else
599
        p2p_cl_in.modes[1] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED);
600
#endif
601
        p2p_cl_in.name = "p2p_cl_in";
BRAMAS Berenger's avatar
BRAMAS Berenger committed
602
        memset(&p2p_cl_inout, 0, sizeof(p2p_cl_inout));
603
#ifdef STARPU_USE_CPU
604
        if(originalCpuKernel->supportP2PExtern(FSTARPU_CPU_IDX)){
605 606 607
            p2p_cl_inout.cpu_funcs[0] = StarPUCpuWrapperClass::directInoutPassCallback;
            p2p_cl_inout.where |= STARPU_CPU;
        }
608
#endif
609
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
610
        if(originalCpuKernel->supportP2PExtern(FSTARPU_CUDA_IDX)){
611 612 613 614
            p2p_cl_inout.cuda_funcs[0] = StarPUCudaWrapperClass::directInoutPassCallback;
            p2p_cl_inout.where |= STARPU_CUDA;
        }
#endif
615
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
616
        if(originalCpuKernel->supportP2PExtern(FSTARPU_OPENCL_IDX)){
617 618 619
            p2p_cl_inout.opencl_funcs[0] = StarPUOpenClWrapperClass::directInoutPassCallback;
            p2p_cl_inout.where |= STARPU_OPENCL;
        }
620
#endif
621 622
        p2p_cl_inout.nbuffers = 4;
        p2p_cl_inout.modes[0] = STARPU_R;
623 624 625
#ifdef STARPU_USE_REDUX
        p2p_cl_inout.modes[1] = STARPU_REDUX;
#else
626
        p2p_cl_inout.modes[1] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED);
627
#endif
628
        p2p_cl_inout.modes[2] = STARPU_R;
629 630 631
#ifdef STARPU_USE_REDUX
        p2p_cl_inout.modes[3] = STARPU_REDUX;
#else
632
        p2p_cl_inout.modes[3] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED);
633
#endif
634
        p2p_cl_inout.name = "p2p_cl_inout";
BRAMAS Berenger's avatar
BRAMAS Berenger committed
635 636

        memset(&m2l_cl_in, 0, sizeof(m2l_cl_in));
637
#ifdef STARPU_USE_CPU
638
        if(originalCpuKernel->supportM2L(FSTARPU_CPU_IDX)){
639 640 641
            m2l_cl_in.cpu_funcs[0] = StarPUCpuWrapperClass::transferInPassCallback;
            m2l_cl_in.where |= STARPU_CPU;
        }
642
#endif
643
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
644
        if(originalCpuKernel->supportM2L(FSTARPU_CUDA_IDX)){
645 646 647 648
            m2l_cl_in.cuda_funcs[0] = StarPUCudaWrapperClass::transferInPassCallback;
            m2l_cl_in.where |= STARPU_CUDA;
        }
#endif
649
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
650
        if(originalCpuKernel->supportM2L(FSTARPU_OPENCL_IDX)){
651 652 653
            m2l_cl_in.opencl_funcs[0] = StarPUOpenClWrapperClass::transferInPassCallback;
            m2l_cl_in.where |= STARPU_OPENCL;
        }
654
#endif
655 656
        m2l_cl_in.nbuffers = 3;
        m2l_cl_in.modes[0] = STARPU_R;
657
        m2l_cl_in.modes[1] = STARPU_R;
658
        m2l_cl_in.modes[2] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED);
659
        m2l_cl_in.name = "m2l_cl_in";
BRAMAS Berenger's avatar
BRAMAS Berenger committed
660

BRAMAS Berenger's avatar
BRAMAS Berenger committed
661
        memset(&m2l_cl_inout, 0, sizeof(m2l_cl_inout));
662
#ifdef STARPU_USE_CPU
663
        if(originalCpuKernel->supportM2LExtern(FSTARPU_CPU_IDX)){
664 665 666
            m2l_cl_inout.cpu_funcs[0] = StarPUCpuWrapperClass::transferInoutPassCallback;
            m2l_cl_inout.where |= STARPU_CPU;
        }
667
#endif
668
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
669
        if(originalCpuKernel->supportM2LExtern(FSTARPU_CUDA_IDX)){
670 671 672 673
            m2l_cl_inout.cuda_funcs[0] = StarPUCudaWrapperClass::transferInoutPassCallback;
            m2l_cl_inout.where |= STARPU_CUDA;
        }
#endif
674
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
675
        if(originalCpuKernel->supportM2LExtern(FSTARPU_OPENCL_IDX)){
676 677 678
            m2l_cl_inout.opencl_funcs[0] = StarPUOpenClWrapperClass::transferInoutPassCallback;
            m2l_cl_inout.where |= STARPU_OPENCL;
        }
679
#endif
BRAMAS Berenger's avatar
BRAMAS Berenger committed
680
        m2l_cl_inout.nbuffers = 4;
681
        m2l_cl_inout.modes[0] = STARPU_R;
BRAMAS Berenger's avatar
BRAMAS Berenger committed
682 683
        m2l_cl_inout.modes[1] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED);
        m2l_cl_inout.modes[2] = STARPU_R;
684
        m2l_cl_inout.modes[3] = STARPU_R;
685
        m2l_cl_inout.name = "m2l_cl_inout";
686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729

#ifdef STARPU_USE_REDUX
        memset(&p2p_redux_init, 0, sizeof(p2p_redux_init));
#ifdef STARPU_USE_CPU
        p2p_redux_init.cpu_funcs[0] = FStarPUReduxCpu::InitData<typename ParticleGroupClass::ParticleDataType>;
        p2p_redux_init.where |= STARPU_CPU;
#endif
        p2p_redux_init.nbuffers = 1;
        p2p_redux_init.modes[0] = STARPU_RW;
        p2p_redux_init.name = "p2p_redux_init";

        memset(&p2p_redux_perform, 0, sizeof(p2p_redux_perform));
#ifdef STARPU_USE_CPU
        p2p_redux_perform.cpu_funcs[0] = FStarPUReduxCpu::ReduceData<typename ParticleGroupClass::ParticleDataType>;
        p2p_redux_perform.where |= STARPU_CPU;
#endif
        p2p_redux_perform.nbuffers = 2;
        p2p_redux_perform.modes[0] = STARPU_RW;
        p2p_redux_perform.modes[1] = STARPU_R;
        p2p_redux_perform.name = "p2p_redux_perform";

        memset(&p2p_redux_read, 0, sizeof(p2p_redux_read));
#ifdef STARPU_USE_CPU
        if(originalCpuKernel->supportL2P(FSTARPU_CPU_IDX)){
            p2p_redux_read.cpu_funcs[0] = FStarPUReduxCpu::EmptyCodelet<typename ParticleGroupClass::ParticleDataType>;
            p2p_redux_read.where |= STARPU_CPU;
        }
#endif
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
        if(originalCpuKernel->supportL2P(FSTARPU_CUDA_IDX)){
            p2p_redux_read.cuda_funcs[0] = FStarPUReduxCpu::EmptyCodelet<typename ParticleGroupClass::ParticleDataType>;
            p2p_redux_read.where |= STARPU_CUDA;
        }
#endif
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
        if(originalCpuKernel->supportL2P(FSTARPU_OPENCL_IDX)){
            p2p_redux_read.opencl_funcs[0] = FStarPUReduxCpu::EmptyCodelet<typename ParticleGroupClass::ParticleDataType>;
            p2p_redux_read.where |= STARPU_OPENCL;
        }
#endif
        p2p_redux_read.nbuffers = 1;
        p2p_redux_read.modes[0] = STARPU_R;
        p2p_redux_read.name = "p2p_redux_read";
#endif
BRAMAS Berenger's avatar
BRAMAS Berenger committed
730 731 732 733 734
    }

    /** dealloc in a starpu way all the defined handles */
    void cleanHandle(){
        for(int idxLevel = 0 ; idxLevel < tree->getHeight() ; ++idxLevel){
735 736 737 738
            for(int idxHandle = 0 ; idxHandle < int(cellHandles[idxLevel].size()) ; ++idxHandle){
                starpu_data_unregister(cellHandles[idxLevel][idxHandle].symb);
                starpu_data_unregister(cellHandles[idxLevel][idxHandle].up);
                starpu_data_unregister(cellHandles[idxLevel][idxHandle].down);
739
            }
740
            cellHandles[idxLevel].clear();
BRAMAS Berenger's avatar
BRAMAS Berenger committed
741 742
        }
        {
743 744 745
            for(int idxHandle = 0 ; idxHandle < int(particleHandles.size()) ; ++idxHandle){
                starpu_data_unregister(particleHandles[idxHandle].symb);
                starpu_data_unregister(particleHandles[idxHandle].down);
BRAMAS Berenger's avatar
BRAMAS Berenger committed
746
            }
747
            particleHandles.clear();
BRAMAS Berenger's avatar
BRAMAS Berenger committed
748 749 750 751 752 753 754 755 756 757
        }
    }

    /** Reset the handles array and create new ones to define
     * in a starpu way each block of data
     */
    void buildHandles(){
        cleanHandle();

        for(int idxLevel = 2 ; idxLevel < tree->getHeight() ; ++idxLevel){
758
            cellHandles[idxLevel].resize(tree->getNbCellGroupAtLevel(idxLevel));
BRAMAS Berenger's avatar
BRAMAS Berenger committed
759 760 761

            for(int idxGroup = 0 ; idxGroup < tree->getNbCellGroupAtLevel(idxLevel) ; ++idxGroup){
                const CellContainerClass* currentCells = tree->getCellGroup(idxLevel, idxGroup);
762
                starpu_variable_data_register(&cellHandles[idxLevel][idxGroup].symb, 0,
763
                                              (uintptr_t)currentCells->getRawBuffer(), currentCells->getBufferSizeInByte());
764 765 766 767
                starpu_variable_data_register(&cellHandles[idxLevel][idxGroup].up, 0,
                                              (uintptr_t)currentCells->getRawMultipoleBuffer(), currentCells->getMultipoleBufferSizeInByte());
                starpu_variable_data_register(&cellHandles[idxLevel][idxGroup].down, 0,
                                              (uintptr_t)currentCells->getRawLocalBuffer(), currentCells->getLocalBufferSizeInByte());
768
                cellHandles[idxLevel][idxGroup].intervalSize = int(currentCells->getNumberOfCellsInBlock());
769
#ifdef STARPU_SUPPORT_ARBITER
BRAMAS Berenger's avatar
BRAMAS Berenger committed
770 771
                starpu_data_assign_arbiter(cellHandles[idxLevel][idxGroup].up, arbiterGlobal);
                starpu_data_assign_arbiter(cellHandles[idxLevel][idxGroup].down, arbiterGlobal);
772
#endif
BRAMAS Berenger's avatar
BRAMAS Berenger committed
773 774 775
            }
        }
        {
776
            particleHandles.resize(tree->getNbParticleGroup());
BRAMAS Berenger's avatar
BRAMAS Berenger committed
777 778
            for(int idxGroup = 0 ; idxGroup < tree->getNbParticleGroup() ; ++idxGroup){
                ParticleGroupClass* containers = tree->getParticleGroup(idxGroup);
779
                starpu_variable_data_register(&particleHandles[idxGroup].symb, 0,
780
                                              (uintptr_t)containers->getRawBuffer(), containers->getBufferSizeInByte());
781 782
                starpu_variable_data_register(&particleHandles[idxGroup].down, 0,
                                              (uintptr_t)containers->getRawAttributesBuffer(), containers->getAttributesBufferSizeInByte());
783 784 785 786
#ifdef STARPU_USE_REDUX
                 starpu_data_set_reduction_methods(particleHandles[idxGroup].down, &p2p_redux_perform,
                                                   &p2p_redux_init);
#else
787
#ifdef STARPU_SUPPORT_ARBITER
BRAMAS Berenger's avatar
BRAMAS Berenger committed
788
                starpu_data_assign_arbiter(particleHandles[idxGroup].down, arbiterGlobal);
789 790 791
#endif // STARPU_SUPPORT_ARBITER
#endif // STARPU_USE_REDUX
                particleHandles[idxGroup].intervalSize = int(containers->getNumberOfLeavesInBlock());
BRAMAS Berenger's avatar
BRAMAS Berenger committed
792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820
            }
        }
    }

    /**
     * This function is creating the interactions vector between blocks.
     * It fills externalInteractionsAllLevel and externalInteractionsLeafLevel.
     * Warning, the omp task for now are using the class attributes!
     *
     */
    void buildExternalInteractionVecs(){
        FLOG( FTic timer; FTic leafTimer; FTic cellTimer; );
        // Reset interactions
        externalInteractionsAllLevel.clear();
        externalInteractionsLeafLevel.clear();
        // One per level + leaf level
        externalInteractionsAllLevel.resize(tree->getHeight());

        // First leaf level
        {
            // We create one big vector per block
            externalInteractionsLeafLevel.resize(tree->getNbParticleGroup());

            for(int idxGroup = 0 ; idxGroup < tree->getNbParticleGroup() ; ++idxGroup){
                // Create the vector
                ParticleGroupClass* containers = tree->getParticleGroup(idxGroup);

                std::vector<BlockInteractions<ParticleGroupClass>>* externalInteractions = &externalInteractionsLeafLevel[idxGroup];

821
                #pragma omp task default(none) firstprivate(idxGroup, containers, externalInteractions)
BRAMAS Berenger's avatar
BRAMAS Berenger committed
822 823 824 825 826
                { // Can be a task(inout:iterCells)
                    std::vector<OutOfBlockInteraction> outsideInteractions;
                    const MortonIndex blockStartIdx = containers->getStartingIndex();
                    const MortonIndex blockEndIdx   = containers->getEndingIndex();

827 828 829
                    for(int leafIdx = 0 ; leafIdx < containers->getNumberOfLeavesInBlock() ; ++leafIdx){
                        const MortonIndex mindex = containers->getLeafMortonIndex(leafIdx);
                        // ParticleContainerClass particles = containers->template getLeaf<ParticleContainerClass>(leafIdx);
BRAMAS Berenger's avatar
BRAMAS Berenger committed
830

831 832 833 834 835 836 837 838 839 840 841 842 843
                        MortonIndex interactionsIndexes[26];
                        int interactionsPosition[26];
                        FTreeCoordinate coord(mindex, tree->getHeight()-1);
                        int counter = coord.getNeighborsIndexes(tree->getHeight(),interactionsIndexes,interactionsPosition);

                        for(int idxInter = 0 ; idxInter < counter ; ++idxInter){
                            if( blockStartIdx <= interactionsIndexes[idxInter] && interactionsIndexes[idxInter] < blockEndIdx ){
                                // Inside block interaction, do nothing
                            }
                            else if(interactionsIndexes[idxInter] < mindex){
                                OutOfBlockInteraction property;
                                property.insideIndex = mindex;
                                property.outIndex    = interactionsIndexes[idxInter];
BRAMAS Berenger's avatar
BRAMAS Berenger committed
844
                                property.relativeOutPosition = interactionsPosition[idxInter];
845
                                property.insideIdxInBlock = leafIdx;
BRAMAS Berenger's avatar
BRAMAS Berenger committed
846
                                property.outsideIdxInBlock = -1;
847
                                outsideInteractions.push_back(property);
BRAMAS Berenger's avatar
BRAMAS Berenger committed
848 849 850 851 852 853 854 855 856 857
                            }
                        }
                    }

                    // Sort to match external order
                    FQuickSort<OutOfBlockInteraction, int>::QsSequential(outsideInteractions.data(),int(outsideInteractions.size()));

                    int currentOutInteraction = 0;
                    for(int idxLeftGroup = 0 ; idxLeftGroup < idxGroup && currentOutInteraction < int(outsideInteractions.size()) ; ++idxLeftGroup){
                        ParticleGroupClass* leftContainers = tree->getParticleGroup(idxLeftGroup);
BRAMAS Berenger's avatar
BRAMAS Berenger committed
858 859
                        const MortonIndex blockStartIdxOther    = leftContainers->getStartingIndex();
                        const MortonIndex blockEndIdxOther      = leftContainers->getEndingIndex();
BRAMAS Berenger's avatar
BRAMAS Berenger committed
860

BRAMAS Berenger's avatar
BRAMAS Berenger committed
861 862 863 864
                        while(currentOutInteraction < int(outsideInteractions.size())
                              && (outsideInteractions[currentOutInteraction].outIndex < blockStartIdxOther
                                  || leftContainers->getLeafIndex(outsideInteractions[currentOutInteraction].outIndex) == -1)
                              && outsideInteractions[currentOutInteraction].outIndex < blockEndIdxOther){
BRAMAS Berenger's avatar
BRAMAS Berenger committed
865 866 867 868
                            currentOutInteraction += 1;
                        }

                        int lastOutInteraction = currentOutInteraction;
BRAMAS Berenger's avatar
BRAMAS Berenger committed
869
                        int copyExistingInteraction = currentOutInteraction;
BRAMAS Berenger's avatar
BRAMAS Berenger committed
870
                        while(lastOutInteraction < int(outsideInteractions.size()) && outsideInteractions[lastOutInteraction].outIndex < blockEndIdxOther){
BRAMAS Berenger's avatar
BRAMAS Berenger committed
871 872 873 874 875 876 877 878
                            const int leafPos = leftContainers->getLeafIndex(outsideInteractions[lastOutInteraction].outIndex);
                            if(leafPos != -1){
                                if(copyExistingInteraction != lastOutInteraction){
                                    outsideInteractions[copyExistingInteraction] = outsideInteractions[lastOutInteraction];
                                }
                                outsideInteractions[copyExistingInteraction].outsideIdxInBlock = leafPos;
                                copyExistingInteraction += 1;
                            }
BRAMAS Berenger's avatar
BRAMAS Berenger committed
879 880 881
                            lastOutInteraction += 1;
                        }

BRAMAS Berenger's avatar
BRAMAS Berenger committed
882
                        const int nbInteractionsBetweenBlocks = (copyExistingInteraction-currentOutInteraction);
BRAMAS Berenger's avatar
BRAMAS Berenger committed
883 884 885 886 887 888 889
                        if(nbInteractionsBetweenBlocks){
                            externalInteractions->emplace_back();
                            BlockInteractions<ParticleGroupClass>* interactions = &externalInteractions->back();
                            interactions->otherBlock = leftContainers;
                            interactions->otherBlockId = idxLeftGroup;
                            interactions->interactions.resize(nbInteractionsBetweenBlocks);
                            std::copy(outsideInteractions.begin() + currentOutInteraction,
BRAMAS Berenger's avatar
BRAMAS Berenger committed
890
                                      outsideInteractions.begin() + copyExistingInteraction,
BRAMAS Berenger's avatar
BRAMAS Berenger committed
891 892 893 894 895 896 897 898 899 900 901 902 903 904 905
                                      interactions->interactions.begin());
                        }

                        currentOutInteraction = lastOutInteraction;
                    }
                }
            }
        }
        FLOG( leafTimer.tac(); );
        FLOG( cellTimer.tic(); );
        {
            for(int idxLevel = tree->getHeight()-1 ; idxLevel >= 2 ; --idxLevel){
                externalInteractionsAllLevel[idxLevel].resize(tree->getNbCellGroupAtLevel(idxLevel));

                for(int idxGroup = 0 ; idxGroup < tree->getNbCellGroupAtLevel(idxLevel) ; ++idxGroup){
906
                    CellContainerClass* currentCells = tree->getCellGroup(idxLevel, idxGroup);
BRAMAS Berenger's avatar
BRAMAS Berenger committed
907 908 909

                    std::vector<BlockInteractions<CellContainerClass>>* externalInteractions = &externalInteractionsAllLevel[idxLevel][idxGroup];

910
                    #pragma omp task default(none) firstprivate(idxGroup, currentCells, idxLevel, externalInteractions)
BRAMAS Berenger's avatar
BRAMAS Berenger committed
911 912 913 914 915
                    {
                        std::vector<OutOfBlockInteraction> outsideInteractions;
                        const MortonIndex blockStartIdx = currentCells->getStartingIndex();
                        const MortonIndex blockEndIdx   = currentCells->getEndingIndex();

916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931
                        for(int cellIdx = 0 ; cellIdx < currentCells->getNumberOfCellsInBlock() ; ++cellIdx){
                            const MortonIndex mindex = currentCells->getCellMortonIndex(cellIdx);

                            MortonIndex interactionsIndexes[189];
                            int interactionsPosition[189];
                            const FTreeCoordinate coord(mindex, idxLevel);
                            int counter = coord.getInteractionNeighbors(idxLevel,interactionsIndexes,interactionsPosition);

                            for(int idxInter = 0 ; idxInter < counter ; ++idxInter){
                                if( blockStartIdx <= interactionsIndexes[idxInter] && interactionsIndexes[idxInter] < blockEndIdx ){
                                    // Nothing to do
                                }
                                else if(interactionsIndexes[idxInter] < mindex){
                                    OutOfBlockInteraction property;
                                    property.insideIndex = mindex;
                                    property.outIndex    = interactionsIndexes[idxInter];
BRAMAS Berenger's avatar
BRAMAS Berenger committed
932
                                    property.relativeOutPosition = interactionsPosition[idxInter];
933
                                    property.insideIdxInBlock = cellIdx;
BRAMAS Berenger's avatar
BRAMAS Berenger committed
934
                                    property.outsideIdxInBlock = -1;
935
                                    outsideInteractions.push_back(property);
BRAMAS Berenger's avatar
BRAMAS Berenger committed
936 937 938 939 940 941 942 943 944 945
                                }
                            }
                        }

                        // Manage outofblock interaction
                        FQuickSort<OutOfBlockInteraction, int>::QsSequential(outsideInteractions.data(),int(outsideInteractions.size()));

                        int currentOutInteraction = 0;
                        for(int idxLeftGroup = 0 ; idxLeftGroup < idxGroup && currentOutInteraction < int(outsideInteractions.size()) ; ++idxLeftGroup){
                            CellContainerClass* leftCells   = tree->getCellGroup(idxLevel,