FGroupTaskStarpuMpiAlgorithm.hpp 139 KB
Newer Older
1 2 3 4
// Keep in private GIT
#ifndef FGROUPTASKSTARPUMPIALGORITHM_HPP
#define FGROUPTASKSTARPUMPIALGORITHM_HPP

5 6 7 8 9 10 11 12 13
#include "../../Utils/FGlobal.hpp"
#include "../../Core/FCoreCommon.hpp"
#include "../../Utils/FQuickSort.hpp"
#include "../../Containers/FTreeCoordinate.hpp"
#include "../../Utils/FLog.hpp"
#include "../../Utils/FTic.hpp"
#include "../../Utils/FAssert.hpp"
#include "../../Utils/FAlignedMemory.hpp"
#include "../../Utils/FAssert.hpp"
14
#include "../../Utils/FEnv.hpp"
15 16

#include "../../Utils/FMpi.hpp"
17

18 19
#include "FOutOfBlockInteraction.hpp"

20 21 22 23 24 25
#include <vector>
#include <memory>

#include <omp.h>

#include <starpu.h>
26
#include <starpu_mpi.h>
27
#include "../StarPUUtils/FStarPUUtils.hpp"
28
#include "../StarPUUtils/FStarPUFmmPriorities.hpp"
29 30
#include "../StarPUUtils/FStarPUFmmPrioritiesV2.hpp"
#include "../StarPUUtils/FStarPUReduxCpu.hpp"
31

32
#ifdef STARPU_USE_CPU
33
#include "../StarPUUtils/FStarPUCpuWrapper.hpp"
34
#endif
35
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
36 37 38 39 40
#include "../StarPUUtils/FStarPUCudaWrapper.hpp"
#include "../Cuda/FCudaEmptyKernel.hpp"
#include "../Cuda/FCudaGroupAttachedLeaf.hpp"
#include "../Cuda/FCudaGroupOfParticles.hpp"
#include "../Cuda/FCudaGroupOfCells.hpp"
41
#include "../Cuda/FCudaEmptyCellSymb.hpp"
42
#endif
43
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
44 45
#include "../StarPUUtils/FStarPUOpenClWrapper.hpp"
#include "../OpenCl/FOpenCLDeviceWrapper.hpp"
46
#include "../OpenCl/FEmptyOpenCLCode.hpp"
47
#endif
48

BRAMAS Berenger's avatar
BRAMAS Berenger committed
49 50
#include "../StarPUUtils/FStarPUReduxCpu.hpp"

51 52
#define SCALFMM_SIMGRID_TASKNAMEPARAMS
#ifdef SCALFMM_SIMGRID_TASKNAMEPARAMS
Martin Khannouz's avatar
Martin Khannouz committed
53
#include "../StarPUUtils/FStarPUTaskNameParams.hpp"
54
#endif
55

56
template <class OctreeClass, class CellContainerClass, class KernelClass, class ParticleGroupClass, class StarPUCpuWrapperClass
BRAMAS Berenger's avatar
BRAMAS Berenger committed
57 58 59 60 61 62 63
          #ifdef SCALFMM_ENABLE_CUDA_KERNEL
          , class StarPUCudaWrapperClass = FStarPUCudaWrapper<KernelClass, FCudaEmptyCellSymb, int, int, FCudaGroupOfCells<FCudaEmptyCellSymb, int, int>,
                                                              FCudaGroupOfParticles<int, 0, 0, int>, FCudaGroupAttachedLeaf<int, 0, 0, int>, FCudaEmptyKernel<int> >
          #endif
          #ifdef SCALFMM_ENABLE_OPENCL_KERNEL
          , class StarPUOpenClWrapperClass = FStarPUOpenClWrapper<KernelClass, FOpenCLDeviceWrapper<KernelClass>>
          #endif
64
          >
65
class FGroupTaskStarPUMpiAlgorithm : public FAbstractAlgorithm {
66
protected:
67
    typedef FGroupTaskStarPUMpiAlgorithm<OctreeClass, CellContainerClass, KernelClass, ParticleGroupClass, StarPUCpuWrapperClass
68
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
BRAMAS Berenger's avatar
BRAMAS Berenger committed
69
    , StarPUCudaWrapperClass
70
#endif
71
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
72
    , StarPUOpenClWrapperClass
73 74
#endif
    > ThisClass;
75

76
    int getTag(const int inLevel, const MortonIndex mindex, const int mode) const{
77 78 79
        int shift = 0;
        int height = tree->getHeight();
        while(height) { shift += 1; height >>= 1; }
BRAMAS Berenger's avatar
BRAMAS Berenger committed
80
        return int((((mindex<<shift) + inLevel) << 5) + mode);
81 82
    }

83 84 85 86 87 88 89 90 91
    const FMpi::FComm& comm;

    template <class OtherBlockClass>
    struct BlockInteractions{
        OtherBlockClass* otherBlock;
        int otherBlockId;
        std::vector<OutOfBlockInteraction> interactions;
    };

92 93 94 95
    struct CellHandles{
        starpu_data_handle_t symb;
        starpu_data_handle_t up;
        starpu_data_handle_t down;
96
        int intervalSize;
97 98 99 100 101
    };

    struct ParticleHandles{
        starpu_data_handle_t symb;
        starpu_data_handle_t down;
102
        int intervalSize;
103 104
    };

105 106 107 108
    std::vector< std::vector< std::vector<BlockInteractions<CellContainerClass>>>> externalInteractionsAllLevel;
    std::vector< std::vector<BlockInteractions<ParticleGroupClass>>> externalInteractionsLeafLevel;

    OctreeClass*const tree;       //< The Tree
109
    KernelClass*const originalCpuKernel;
110

111 112
    std::vector<CellHandles>* cellHandles;
    std::vector<ParticleHandles> particleHandles;
113 114

    starpu_codelet p2m_cl;
BRAMAS Berenger's avatar
BRAMAS Berenger committed
115 116
    starpu_codelet m2m_cl;
    starpu_codelet l2l_cl;
BRAMAS Berenger's avatar
BRAMAS Berenger committed
117
    starpu_codelet l2l_cl_nocommute;
118 119 120 121
    starpu_codelet l2p_cl;

    starpu_codelet m2l_cl_in;
    starpu_codelet m2l_cl_inout;
122
    starpu_codelet m2l_cl_inout_mpi;
123 124 125

    starpu_codelet p2p_cl_in;
    starpu_codelet p2p_cl_inout;
126
    starpu_codelet p2p_cl_inout_mpi;
127

BRAMAS Berenger's avatar
BRAMAS Berenger committed
128 129 130 131 132 133 134

#ifdef STARPU_USE_REDUX
    starpu_codelet p2p_redux_init;
    starpu_codelet p2p_redux_perform;
    starpu_codelet p2p_redux_read;
#endif

135 136 137
    const bool noCommuteAtLastLevel;
    const bool noCommuteBetweenLevel;

138
#ifdef STARPU_USE_CPU
139
    StarPUCpuWrapperClass cpuWrapper;
140
#endif
141
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
142 143
    StarPUCudaWrapperClass cudaWrapper;
#endif
144
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
145 146
    StarPUOpenClWrapperClass openclWrapper;
#endif
147 148 149

    FStarPUPtrInterface wrappers;
    FStarPUPtrInterface* wrapperptr;
150

151
#ifdef STARPU_SUPPORT_ARBITER
BRAMAS Berenger's avatar
BRAMAS Berenger committed
152
    starpu_arbiter_t arbiterGlobal;
153
#endif
BRAMAS Berenger's avatar
BRAMAS Berenger committed
154 155

#ifdef STARPU_USE_TASK_NAME
156 157 158 159 160 161 162 163 164 165
#ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS
    std::vector<std::unique_ptr<char[]>> m2mTaskNames;
    std::vector<std::unique_ptr<char[]>> m2lTaskNames;
    std::vector<std::unique_ptr<char[]>> m2lOuterTaskNames;
    std::vector<std::unique_ptr<char[]>> l2lTaskNames;
    std::unique_ptr<char[]> p2mTaskNames;
    std::unique_ptr<char[]> l2pTaskNames;
    std::unique_ptr<char[]> p2pTaskNames;
    std::unique_ptr<char[]> p2pOuterTaskNames;
#else
Martin Khannouz's avatar
Martin Khannouz committed
166
    FStarPUTaskNameParams taskNames;
BRAMAS Berenger's avatar
BRAMAS Berenger committed
167
#endif
168
#endif
169 170 171
#ifdef SCALFMM_STARPU_USE_PRIO
    typedef FStarPUFmmPrioritiesV2 PrioClass;// FStarPUFmmPriorities
#endif
172
	std::list<char*> taskName;
BRAMAS Berenger's avatar
BRAMAS Berenger committed
173

174
public:
175 176
    FGroupTaskStarPUMpiAlgorithm(const FMpi::FComm& inComm, OctreeClass*const inTree, KernelClass* inKernels)
        :   comm(inComm), tree(inTree), originalCpuKernel(inKernels),
177
          cellHandles(nullptr),
178 179
          noCommuteAtLastLevel(FEnv::GetBool("SCALFMM_NO_COMMUTE_LAST_L2L", true)),
          noCommuteBetweenLevel(FEnv::GetBool("SCALFMM_NO_COMMUTE_M2L_L2L", false)),
BRAMAS Berenger's avatar
BRAMAS Berenger committed
180 181 182 183 184 185 186 187
      #ifdef STARPU_USE_CPU
          cpuWrapper(tree->getHeight()),
      #endif
      #ifdef SCALFMM_ENABLE_CUDA_KERNEL
          cudaWrapper(tree->getHeight()),
      #endif
      #ifdef SCALFMM_ENABLE_OPENCL_KERNEL
          openclWrapper(tree->getHeight()),
Martin Khannouz's avatar
Martin Khannouz committed
188 189
      #endif
	  #ifdef STARPU_USE_TASK_NAME
190 191 192
	  #ifdef SCALFMM_SIMGRID_TASKNAMEPARAMS
         taskNames(inComm.processId(), inComm.processCount()),
      #endif
BRAMAS Berenger's avatar
BRAMAS Berenger committed
193 194
      #endif
          wrapperptr(&wrappers){
195 196 197
        FAssertLF(tree, "tree cannot be null");
        FAssertLF(inKernels, "kernels cannot be null");

198 199
        FAbstractAlgorithm::setNbLevelsInTree(tree->getHeight());

200 201
        struct starpu_conf conf;
        FAssertLF(starpu_conf_init(&conf) == 0);
202
#ifdef SCALFMM_STARPU_USE_PRIO
203
        PrioClass::Controller().init(&conf, tree->getHeight(), inKernels);
204
#endif
205
        FAssertLF(starpu_init(&conf) == 0);
206
        FAssertLF(starpu_mpi_init ( 0, 0, 0 ) == 0);
207

BRAMAS Berenger's avatar
BRAMAS Berenger committed
208 209
        starpu_malloc_set_align(32);

210 211
        starpu_pthread_mutex_t initMutex;
        starpu_pthread_mutex_init(&initMutex, NULL);
212
#ifdef STARPU_USE_CPU
213 214 215 216 217
        FStarPUUtils::ExecOnWorkers(STARPU_CPU, [&](){
            starpu_pthread_mutex_lock(&initMutex);
            cpuWrapper.initKernel(starpu_worker_get_id(), inKernels);
            starpu_pthread_mutex_unlock(&initMutex);
        });
218
        wrappers.set(FSTARPU_CPU_IDX, &cpuWrapper);
219
#endif
220
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
221 222 223 224 225 226 227
        FStarPUUtils::ExecOnWorkers(STARPU_CUDA, [&](){
            starpu_pthread_mutex_lock(&initMutex);
            cudaWrapper.initKernel(starpu_worker_get_id(), inKernels);
            starpu_pthread_mutex_unlock(&initMutex);
        });
        wrappers.set(FSTARPU_CUDA_IDX, &cudaWrapper);
#endif
228
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
229 230 231 232 233 234
        FStarPUUtils::ExecOnWorkers(STARPU_OPENCL, [&](){
            starpu_pthread_mutex_lock(&initMutex);
            openclWrapper.initKernel(starpu_worker_get_id(), inKernels);
            starpu_pthread_mutex_unlock(&initMutex);
        });
        wrappers.set(FSTARPU_OPENCL_IDX, &openclWrapper);
235
#endif
236 237
        starpu_pthread_mutex_destroy(&initMutex);

238 239
        starpu_pause();

240
        cellHandles   = new std::vector<CellHandles>[tree->getHeight()];
241

242
#ifdef STARPU_SUPPORT_ARBITER
BRAMAS Berenger's avatar
BRAMAS Berenger committed
243
        arbiterGlobal = starpu_arbiter_create();
244
#endif
BRAMAS Berenger's avatar
BRAMAS Berenger committed
245 246
        initCodelet();
        initCodeletMpi();
BRAMAS Berenger's avatar
BRAMAS Berenger committed
247
        rebuildInteractions();
BRAMAS Berenger's avatar
BRAMAS Berenger committed
248

BRAMAS Berenger's avatar
BRAMAS Berenger committed
249
        FLOG(FLog::Controller << "FGroupTaskStarPUAlgorithm (Max Worker " << starpu_worker_get_count() << ")\n");
BRAMAS Berenger's avatar
BRAMAS Berenger committed
250
#ifdef STARPU_USE_CPU
BRAMAS Berenger's avatar
BRAMAS Berenger committed
251
        FLOG(FLog::Controller << "FGroupTaskStarPUAlgorithm (Max CPU " << starpu_cpu_worker_get_count() << ")\n");
BRAMAS Berenger's avatar
BRAMAS Berenger committed
252
#endif
253
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
BRAMAS Berenger's avatar
BRAMAS Berenger committed
254 255
        FLOG(FLog::Controller << "FGroupTaskStarPUAlgorithm (Max OpenCL " << starpu_opencl_worker_get_count() << ")\n");
#endif
256
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
BRAMAS Berenger's avatar
BRAMAS Berenger committed
257 258
        FLOG(FLog::Controller << "FGroupTaskStarPUAlgorithm (Max CUDA " << starpu_cuda_worker_get_count() << ")\n");
#endif
259 260
        FLOG(FLog::Controller << "SCALFMM_NO_COMMUTE_LAST_L2L " << noCommuteAtLastLevel << "\n");
        FLOG(FLog::Controller << "SCALFMM_NO_COMMUTE_M2L_L2L " << noCommuteBetweenLevel << "\n");
BRAMAS Berenger's avatar
BRAMAS Berenger committed
261 262 263 264 265

        buildTaskNames();
    }

    void buildTaskNames(){
266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293
#ifdef STARPU_USE_TASK_NAME
#ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS
        const int namesLength = 128;
        m2mTaskNames.resize(tree->getHeight());
        m2lTaskNames.resize(tree->getHeight());
        m2lOuterTaskNames.resize(tree->getHeight());
        l2lTaskNames.resize(tree->getHeight());
        for(int idxLevel = 0 ; idxLevel < tree->getHeight() ; ++idxLevel){
            m2mTaskNames[idxLevel].reset(new char[namesLength]);
            snprintf(m2mTaskNames[idxLevel].get(), namesLength, "M2M-level-%d", idxLevel);
            m2lTaskNames[idxLevel].reset(new char[namesLength]);
            snprintf(m2lTaskNames[idxLevel].get(), namesLength, "M2L-level-%d", idxLevel);
            m2lOuterTaskNames[idxLevel].reset(new char[namesLength]);
            snprintf(m2lOuterTaskNames[idxLevel].get(), namesLength, "M2L-out-level-%d", idxLevel);
            l2lTaskNames[idxLevel].reset(new char[namesLength]);
            snprintf(l2lTaskNames[idxLevel].get(), namesLength, "L2L-level-%d", idxLevel);
        }

        p2mTaskNames.reset(new char[namesLength]);
        snprintf(p2mTaskNames.get(), namesLength, "P2M");
        l2pTaskNames.reset(new char[namesLength]);
        snprintf(l2pTaskNames.get(), namesLength, "L2P");
        p2pTaskNames.reset(new char[namesLength]);
        snprintf(p2pTaskNames.get(), namesLength, "P2P");
        p2pOuterTaskNames.reset(new char[namesLength]);
        snprintf(p2pOuterTaskNames.get(), namesLength, "P2P-out");
#endif
#endif
294 295
    }

296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316
    void syncData(){
        for(int idxLevel = 0 ; idxLevel < tree->getHeight() ; ++idxLevel){
            for(int idxHandle = 0 ; idxHandle < int(cellHandles[idxLevel].size()) ; ++idxHandle){
                starpu_data_acquire(cellHandles[idxLevel][idxHandle].symb, STARPU_R);
                starpu_data_release(cellHandles[idxLevel][idxHandle].symb);
                starpu_data_acquire(cellHandles[idxLevel][idxHandle].up, STARPU_R);
                starpu_data_release(cellHandles[idxLevel][idxHandle].up);
                starpu_data_acquire(cellHandles[idxLevel][idxHandle].down, STARPU_R);
                starpu_data_release(cellHandles[idxLevel][idxHandle].down);
            }
        }
        {
            for(int idxHandle = 0 ; idxHandle < int(particleHandles.size()) ; ++idxHandle){
                starpu_data_acquire(particleHandles[idxHandle].symb, STARPU_R);
                starpu_data_release(particleHandles[idxHandle].symb);
                starpu_data_acquire(particleHandles[idxHandle].down, STARPU_R);
                starpu_data_release(particleHandles[idxHandle].down);
            }
        }
    }

317
    ~FGroupTaskStarPUMpiAlgorithm(){
BRAMAS Berenger's avatar
BRAMAS Berenger committed
318 319
        starpu_resume();

320 321
		std::cout << "Loutre " << comm.processId() << std::endl;
		comm.barrier();
322
        cleanHandle();
323 324
		std::cout << "Canard " << comm.processId() << std::endl;
		comm.barrier();
325
        cleanHandleMpi();
326 327
		std::cout << "Suricate " << comm.processId() << std::endl;
		comm.barrier();
328
        delete[] cellHandles;
329

BRAMAS Berenger's avatar
BRAMAS Berenger committed
330 331 332 333 334 335 336 337 338 339
        starpu_pthread_mutex_t releaseMutex;
        starpu_pthread_mutex_init(&releaseMutex, NULL);
#ifdef STARPU_USE_CPU
        FStarPUUtils::ExecOnWorkers(STARPU_CPU, [&](){
            starpu_pthread_mutex_lock(&releaseMutex);
            cpuWrapper.releaseKernel(starpu_worker_get_id());
            starpu_pthread_mutex_unlock(&releaseMutex);
        });
        wrappers.set(FSTARPU_CPU_IDX, &cpuWrapper);
#endif
340
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
BRAMAS Berenger's avatar
BRAMAS Berenger committed
341 342 343 344 345 346 347
        FStarPUUtils::ExecOnWorkers(STARPU_CUDA, [&](){
            starpu_pthread_mutex_lock(&releaseMutex);
            cudaWrapper.releaseKernel(starpu_worker_get_id());
            starpu_pthread_mutex_unlock(&releaseMutex);
        });
        wrappers.set(FSTARPU_CUDA_IDX, &cudaWrapper);
#endif
348
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
BRAMAS Berenger's avatar
BRAMAS Berenger committed
349 350 351 352 353 354 355 356 357
        FStarPUUtils::ExecOnWorkers(STARPU_OPENCL, [&](){
            starpu_pthread_mutex_lock(&releaseMutex);
            openclWrapper.releaseKernel(starpu_worker_get_id());
            starpu_pthread_mutex_unlock(&releaseMutex);
        });
        wrappers.set(FSTARPU_OPENCL_IDX, &openclWrapper);
#endif
        starpu_pthread_mutex_destroy(&releaseMutex);

358 359

#ifdef STARPU_SUPPORT_ARBITER
BRAMAS Berenger's avatar
BRAMAS Berenger committed
360
        starpu_arbiter_destroy(arbiterGlobal);
361
#endif
362 363
		for(char* ptr : taskName)
			free(ptr);
364
        starpu_mpi_shutdown();
365 366 367
        starpu_shutdown();
    }

BRAMAS Berenger's avatar
BRAMAS Berenger committed
368
    void rebuildInteractions(){
369
        FAssertLF(getenv("OMP_WAIT_POLICY") == nullptr
BRAMAS Berenger's avatar
BRAMAS Berenger committed
370 371
                || strcmp(getenv("OMP_WAIT_POLICY"), "PASSIVE") == 0
                  || strcmp(getenv("OMP_WAIT_POLICY"), "passive") == 0);
372

BRAMAS Berenger's avatar
BRAMAS Berenger committed
373 374
#pragma omp parallel
#pragma omp single
375 376 377
        buildExternalInteractionVecs();
        buildHandles();

BRAMAS Berenger's avatar
BRAMAS Berenger committed
378 379
#pragma omp parallel
#pragma omp single
380
        buildRemoteInteractionsAndHandles();
BRAMAS Berenger's avatar
BRAMAS Berenger committed
381 382
    }

383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426
#ifdef STARPU_USE_CPU
    void forEachCpuWorker(std::function<void(void)> func){
        starpu_resume();
        FStarPUUtils::ExecOnWorkers(STARPU_CPU, func);
        starpu_pause();
    }

    void forEachCpuWorker(std::function<void(KernelClass*)> func){
        starpu_resume();
        FStarPUUtils::ExecOnWorkers(STARPU_CPU, [&](){
            func(cpuWrapper.getKernel(starpu_worker_get_id()));
        });
        starpu_pause();
    }
#endif
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
    void forEachCudaWorker(std::function<void(void)> func){
        starpu_resume();
        FStarPUUtils::ExecOnWorkers(STARPU_CUDA, func);
        starpu_pause();
    }
    void forEachCudaWorker(std::function<void(void*)> func){
        starpu_resume();
        FStarPUUtils::ExecOnWorkers(STARPU_CUDA, [&](){
            func(cudaWrapper.getKernel(starpu_worker_get_id()));
        });
        starpu_pause();
    }
#endif
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
    void forEachOpenCLWorker(std::function<void(void)> func){
        starpu_resume();
        FStarPUUtils::ExecOnWorkers(STARPU_OPENCL, func);
        starpu_pause();
    }
    void forEachOpenCLWorker(std::function<void(void*)> func){
        starpu_resume();
        FStarPUUtils::ExecOnWorkers(STARPU_OPENCL, [&](){
            func(openclWrapper.getKernel(starpu_worker_get_id()));
        });
        starpu_pause();
    }
#endif

BRAMAS Berenger's avatar
BRAMAS Berenger committed
427
protected:
428

BRAMAS Berenger's avatar
BRAMAS Berenger committed
429 430 431 432 433 434 435 436 437 438 439
    /**
      * Runs the complete algorithm.
      */
    void executeCore(const unsigned operationsToProceed) override {
        FLOG( FLog::Controller << "\tStart FGroupTaskStarPUMpiAlgorithm\n" );
        const bool directOnly = (tree->getHeight() <= 2);

#ifdef STARPU_USE_CPU
        FTIME_TASKS(cpuWrapper.taskTimeRecorder.start());
#endif

440 441
        FLOG(FTic timerSoumission);

442
        starpu_resume();
443
        postRecvAllocatedBlocks();
444

445
        if( operationsToProceed & FFmmP2P ) insertParticlesSend();
446

447 448 449
        if( operationsToProceed & FFmmP2P ) directPass();
        if( operationsToProceed & FFmmP2P ) directPassMpi();

450
        if(operationsToProceed & FFmmP2M && !directOnly) bottomPass();
451

BRAMAS Berenger's avatar
BRAMAS Berenger committed
452
        if(operationsToProceed & FFmmM2M && !directOnly) upwardPass();
453
        if(operationsToProceed & FFmmM2L && !directOnly) insertCellsSend();
454

BRAMAS Berenger's avatar
BRAMAS Berenger committed
455 456 457
        if(operationsToProceed & FFmmM2L && !directOnly) transferPass(FAbstractAlgorithm::upperWorkingLevel, FAbstractAlgorithm::lowerWorkingLevel-1 , true, true);
        if(operationsToProceed & FFmmM2L && !directOnly) transferPass(FAbstractAlgorithm::lowerWorkingLevel-1, FAbstractAlgorithm::lowerWorkingLevel, false, false);
        if(operationsToProceed & FFmmM2L && !directOnly) transferPassMpi();
458

BRAMAS Berenger's avatar
BRAMAS Berenger committed
459
        if(operationsToProceed & FFmmL2L && !directOnly) downardPass();
460

BRAMAS Berenger's avatar
BRAMAS Berenger committed
461
        if(operationsToProceed & FFmmM2L && !directOnly) transferPass(FAbstractAlgorithm::lowerWorkingLevel-1, FAbstractAlgorithm::lowerWorkingLevel, true, true);
BRAMAS Berenger's avatar
BRAMAS Berenger committed
462

BRAMAS Berenger's avatar
BRAMAS Berenger committed
463 464 465 466
        if( operationsToProceed & FFmmL2P && !directOnly) mergePass();
#ifdef STARPU_USE_REDUX
        if( operationsToProceed & FFmmL2P && !directOnly) readParticle();
#endif
467

468
        FLOG( FLog::Controller << "\t\t Submitting the tasks took " << timerSoumission.tacAndElapsed() << "s\n" );
469
        starpu_task_wait_for_all();
470 471 472 473 474

        FLOG( FTic timerSync; );
        syncData();
        FLOG( FLog::Controller << "\t\t Moving data to the host took " << timerSync.tacAndElapsed() << "s\n" );

475
        starpu_pause();
BRAMAS Berenger's avatar
BRAMAS Berenger committed
476 477 478 479 480

#ifdef STARPU_USE_CPU
        FTIME_TASKS(cpuWrapper.taskTimeRecorder.end());
        FTIME_TASKS(cpuWrapper.taskTimeRecorder.saveToDisk("/tmp/taskstime-FGroupTaskStarPUAlgorithm.txt"));
#endif
481
    }
482

483 484
    void initCodelet(){
        memset(&p2m_cl, 0, sizeof(p2m_cl));
485
#ifdef STARPU_USE_CPU
486
        if(originalCpuKernel->supportP2M(FSTARPU_CPU_IDX)){
487 488 489
            p2m_cl.cpu_funcs[0] = StarPUCpuWrapperClass::bottomPassCallback;
            p2m_cl.where |= STARPU_CPU;
        }
490
#endif
491
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
492
        if(originalCpuKernel->supportP2M(FSTARPU_CUDA_IDX)){
493 494 495 496
            p2m_cl.cuda_funcs[0] = StarPUCudaWrapperClass::bottomPassCallback;
            p2m_cl.where |= STARPU_CUDA;
        }
#endif
497
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
498
        if(originalCpuKernel->supportP2M(FSTARPU_OPENCL_IDX)){
499 500 501
            p2m_cl.opencl_funcs[0] = StarPUOpenClWrapperClass::bottomPassCallback;
            p2m_cl.where |= STARPU_OPENCL;
        }
502
#endif
503 504 505 506
        p2m_cl.nbuffers = 3;
        p2m_cl.modes[0] = STARPU_R;
        p2m_cl.modes[1] = STARPU_RW;
        p2m_cl.modes[2] = STARPU_R;
507
        p2m_cl.name = "p2m_cl";
508

BRAMAS Berenger's avatar
BRAMAS Berenger committed
509
        memset(&m2m_cl, 0, sizeof(m2m_cl));
510
#ifdef STARPU_USE_CPU
BRAMAS Berenger's avatar
BRAMAS Berenger committed
511 512 513 514
        if(originalCpuKernel->supportM2M(FSTARPU_CPU_IDX)){
            m2m_cl.cpu_funcs[0] = StarPUCpuWrapperClass::upwardPassCallback;
            m2m_cl.where |= STARPU_CPU;
        }
515
#endif
516
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
BRAMAS Berenger's avatar
BRAMAS Berenger committed
517 518 519 520
        if(originalCpuKernel->supportM2M(FSTARPU_CUDA_IDX)){
            m2m_cl.cuda_funcs[0] = StarPUCudaWrapperClass::upwardPassCallback;
            m2m_cl.where |= STARPU_CUDA;
        }
521
#endif
522
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
BRAMAS Berenger's avatar
BRAMAS Berenger committed
523 524 525 526
        if(originalCpuKernel->supportM2M(FSTARPU_OPENCL_IDX)){
            m2m_cl.opencl_funcs[0] = StarPUOpenClWrapperClass::upwardPassCallback;
            m2m_cl.where |= STARPU_OPENCL;
        }
527
#endif
BRAMAS Berenger's avatar
BRAMAS Berenger committed
528 529 530 531 532 533 534 535 536
        m2m_cl.nbuffers = 4;
        m2m_cl.dyn_modes = (starpu_data_access_mode*)malloc(m2m_cl.nbuffers*sizeof(starpu_data_access_mode));
        m2m_cl.dyn_modes[0] = STARPU_R;
        m2m_cl.dyn_modes[1] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED);
        m2m_cl.name = "m2m_cl";
        m2m_cl.dyn_modes[2] = STARPU_R;
        m2m_cl.dyn_modes[3] = STARPU_R;

        memset(&l2l_cl, 0, sizeof(l2l_cl));
537
#ifdef STARPU_USE_CPU
BRAMAS Berenger's avatar
BRAMAS Berenger committed
538 539 540 541
        if(originalCpuKernel->supportL2L(FSTARPU_CPU_IDX)){
            l2l_cl.cpu_funcs[0] = StarPUCpuWrapperClass::downardPassCallback;
            l2l_cl.where |= STARPU_CPU;
        }
542
#endif
543
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
BRAMAS Berenger's avatar
BRAMAS Berenger committed
544 545 546 547
        if(originalCpuKernel->supportL2L(FSTARPU_CUDA_IDX)){
            l2l_cl.cuda_funcs[0] = StarPUCudaWrapperClass::downardPassCallback;
            l2l_cl.where |= STARPU_CUDA;
        }
548
#endif
549
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
BRAMAS Berenger's avatar
BRAMAS Berenger committed
550 551 552
        if(originalCpuKernel->supportL2L(FSTARPU_OPENCL_IDX)){
            l2l_cl.opencl_funcs[0] = StarPUOpenClWrapperClass::downardPassCallback;
            l2l_cl.where |= STARPU_OPENCL;
553
        }
BRAMAS Berenger's avatar
BRAMAS Berenger committed
554 555 556 557 558 559 560 561
#endif
        l2l_cl.nbuffers = 4;
        l2l_cl.dyn_modes = (starpu_data_access_mode*)malloc(l2l_cl.nbuffers*sizeof(starpu_data_access_mode));
        l2l_cl.dyn_modes[0] = STARPU_R;
        l2l_cl.dyn_modes[1] = STARPU_R;
        l2l_cl.name = "l2l_cl";
        l2l_cl.dyn_modes[2] = STARPU_R;
        l2l_cl.dyn_modes[3] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED);
562

BRAMAS Berenger's avatar
BRAMAS Berenger committed
563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589
        memset(&l2l_cl_nocommute, 0, sizeof(l2l_cl_nocommute));
#ifdef STARPU_USE_CPU
        if(originalCpuKernel->supportL2L(FSTARPU_CPU_IDX)){
            l2l_cl_nocommute.cpu_funcs[0] = StarPUCpuWrapperClass::downardPassCallback;
            l2l_cl_nocommute.where |= STARPU_CPU;
        }
#endif
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
        if(originalCpuKernel->supportL2L(FSTARPU_CUDA_IDX)){
            l2l_cl_nocommute.cuda_funcs[0] = StarPUCudaWrapperClass::downardPassCallback;
            l2l_cl_nocommute.where |= STARPU_CUDA;
        }
#endif
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
        if(originalCpuKernel->supportL2L(FSTARPU_OPENCL_IDX)){
            l2l_cl_nocommute.opencl_funcs[0] = StarPUOpenClWrapperClass::downardPassCallback;
            l2l_cl_nocommute.where |= STARPU_OPENCL;
        }
#endif
        l2l_cl_nocommute.nbuffers = 4;
        l2l_cl_nocommute.dyn_modes = (starpu_data_access_mode*)malloc(l2l_cl_nocommute.nbuffers*sizeof(starpu_data_access_mode));
        l2l_cl_nocommute.dyn_modes[0] = STARPU_R;
        l2l_cl_nocommute.dyn_modes[1] = STARPU_R;
        l2l_cl_nocommute.name = "l2l_cl";
        l2l_cl_nocommute.dyn_modes[2] = STARPU_R;
        l2l_cl_nocommute.dyn_modes[3] = STARPU_RW;

590
        memset(&l2p_cl, 0, sizeof(l2p_cl));
591
#ifdef STARPU_USE_CPU
592
        if(originalCpuKernel->supportL2P(FSTARPU_CPU_IDX)){
593 594 595
            l2p_cl.cpu_funcs[0] = StarPUCpuWrapperClass::mergePassCallback;
            l2p_cl.where |= STARPU_CPU;
        }
596
#endif
597
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
598
        if(originalCpuKernel->supportL2P(FSTARPU_CUDA_IDX)){
599 600 601 602
            l2p_cl.cuda_funcs[0] = StarPUCudaWrapperClass::mergePassCallback;
            l2p_cl.where |= STARPU_CUDA;
        }
#endif
603
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
604
        if(originalCpuKernel->supportL2P(FSTARPU_OPENCL_IDX)){
605 606 607
            l2p_cl.opencl_funcs[0] = StarPUOpenClWrapperClass::mergePassCallback;
            l2p_cl.where |= STARPU_OPENCL;
        }
608
#endif
609
        l2p_cl.nbuffers = 4;
610
        l2p_cl.modes[0] = STARPU_R;
611 612
        l2p_cl.modes[1] = STARPU_R;
        l2p_cl.modes[2] = STARPU_R;
BRAMAS Berenger's avatar
BRAMAS Berenger committed
613 614 615
#ifdef STARPU_USE_REDUX
        l2p_cl.modes[3] = STARPU_REDUX;
#else
616
        l2p_cl.modes[3] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED);
BRAMAS Berenger's avatar
BRAMAS Berenger committed
617
#endif
618
        l2p_cl.name = "l2p_cl";
619 620

        memset(&p2p_cl_in, 0, sizeof(p2p_cl_in));
621
#ifdef STARPU_USE_CPU
622
        if(originalCpuKernel->supportP2P(FSTARPU_CPU_IDX)){
623 624 625
            p2p_cl_in.cpu_funcs[0] = StarPUCpuWrapperClass::directInPassCallback;
            p2p_cl_in.where |= STARPU_CPU;
        }
626
#endif
627
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
628
        if(originalCpuKernel->supportP2P(FSTARPU_CUDA_IDX)){
629 630 631 632
            p2p_cl_in.cuda_funcs[0] = StarPUCudaWrapperClass::directInPassCallback;
            p2p_cl_in.where |= STARPU_CUDA;
        }
#endif
633
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
634
        if(originalCpuKernel->supportP2P(FSTARPU_OPENCL_IDX)){
635 636 637
            p2p_cl_in.opencl_funcs[0] = StarPUOpenClWrapperClass::directInPassCallback;
            p2p_cl_in.where |= STARPU_OPENCL;
        }
638
#endif
639 640
        p2p_cl_in.nbuffers = 2;
        p2p_cl_in.modes[0] = STARPU_R;
BRAMAS Berenger's avatar
BRAMAS Berenger committed
641 642 643
#ifdef STARPU_USE_REDUX
        p2p_cl_in.modes[1] = STARPU_REDUX;
#else
644
        p2p_cl_in.modes[1] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED);
BRAMAS Berenger's avatar
BRAMAS Berenger committed
645
#endif
646
        p2p_cl_in.name = "p2p_cl_in";
647
        memset(&p2p_cl_inout, 0, sizeof(p2p_cl_inout));
648
#ifdef STARPU_USE_CPU
649
        if(originalCpuKernel->supportP2PExtern(FSTARPU_CPU_IDX)){
650 651 652
            p2p_cl_inout.cpu_funcs[0] = StarPUCpuWrapperClass::directInoutPassCallback;
            p2p_cl_inout.where |= STARPU_CPU;
        }
653
#endif
654
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
655
        if(originalCpuKernel->supportP2PExtern(FSTARPU_CUDA_IDX)){
656 657 658 659
            p2p_cl_inout.cuda_funcs[0] = StarPUCudaWrapperClass::directInoutPassCallback;
            p2p_cl_inout.where |= STARPU_CUDA;
        }
#endif
660
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
661
        if(originalCpuKernel->supportP2PExtern(FSTARPU_OPENCL_IDX)){
662 663 664
            p2p_cl_inout.opencl_funcs[0] = StarPUOpenClWrapperClass::directInoutPassCallback;
            p2p_cl_inout.where |= STARPU_OPENCL;
        }
665
#endif
666 667
        p2p_cl_inout.nbuffers = 4;
        p2p_cl_inout.modes[0] = STARPU_R;
BRAMAS Berenger's avatar
BRAMAS Berenger committed
668 669 670
#ifdef STARPU_USE_REDUX
        p2p_cl_inout.modes[1] = STARPU_REDUX;
#else
671
        p2p_cl_inout.modes[1] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED);
BRAMAS Berenger's avatar
BRAMAS Berenger committed
672
#endif
673
        p2p_cl_inout.modes[2] = STARPU_R;
BRAMAS Berenger's avatar
BRAMAS Berenger committed
674 675 676
#ifdef STARPU_USE_REDUX
        p2p_cl_inout.modes[3] = STARPU_REDUX;
#else
677
        p2p_cl_inout.modes[3] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED);
BRAMAS Berenger's avatar
BRAMAS Berenger committed
678
#endif
679
        p2p_cl_inout.name = "p2p_cl_inout";
680 681

        memset(&m2l_cl_in, 0, sizeof(m2l_cl_in));
682
#ifdef STARPU_USE_CPU
683
        if(originalCpuKernel->supportM2L(FSTARPU_CPU_IDX)){
684 685 686
            m2l_cl_in.cpu_funcs[0] = StarPUCpuWrapperClass::transferInPassCallback;
            m2l_cl_in.where |= STARPU_CPU;
        }
687
#endif
688
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
689
        if(originalCpuKernel->supportM2L(FSTARPU_CUDA_IDX)){
690 691 692 693
            m2l_cl_in.cuda_funcs[0] = StarPUCudaWrapperClass::transferInPassCallback;
            m2l_cl_in.where |= STARPU_CUDA;
        }
#endif
694
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
695
        if(originalCpuKernel->supportM2L(FSTARPU_OPENCL_IDX)){
696 697 698
            m2l_cl_in.opencl_funcs[0] = StarPUOpenClWrapperClass::transferInPassCallback;
            m2l_cl_in.where |= STARPU_OPENCL;
        }
699
#endif
700 701
        m2l_cl_in.nbuffers = 3;
        m2l_cl_in.modes[0] = STARPU_R;
702
        m2l_cl_in.modes[1] = STARPU_R;
703
        m2l_cl_in.modes[2] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED);
704
        m2l_cl_in.name = "m2l_cl_in";
BRAMAS Berenger's avatar
BRAMAS Berenger committed
705

706
        memset(&m2l_cl_inout, 0, sizeof(m2l_cl_inout));
707
#ifdef STARPU_USE_CPU
708
        if(originalCpuKernel->supportM2LExtern(FSTARPU_CPU_IDX)){
709 710 711
            m2l_cl_inout.cpu_funcs[0] = StarPUCpuWrapperClass::transferInoutPassCallback;
            m2l_cl_inout.where |= STARPU_CPU;
        }
712
#endif
713
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
714
        if(originalCpuKernel->supportM2LExtern(FSTARPU_CUDA_IDX)){
715 716 717 718
            m2l_cl_inout.cuda_funcs[0] = StarPUCudaWrapperClass::transferInoutPassCallback;
            m2l_cl_inout.where |= STARPU_CUDA;
        }
#endif
719
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
720
        if(originalCpuKernel->supportM2LExtern(FSTARPU_OPENCL_IDX)){
721 722 723
            m2l_cl_inout.opencl_funcs[0] = StarPUOpenClWrapperClass::transferInoutPassCallback;
            m2l_cl_inout.where |= STARPU_OPENCL;
        }
724
#endif
BRAMAS Berenger's avatar
BRAMAS Berenger committed
725
        m2l_cl_inout.nbuffers = 4;
726
        m2l_cl_inout.modes[0] = STARPU_R;
BRAMAS Berenger's avatar
BRAMAS Berenger committed
727 728
        m2l_cl_inout.modes[1] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED);
        m2l_cl_inout.modes[2] = STARPU_R;
729
        m2l_cl_inout.modes[3] = STARPU_R;
730
        m2l_cl_inout.name = "m2l_cl_inout";
BRAMAS Berenger's avatar
BRAMAS Berenger committed
731

BRAMAS Berenger's avatar
BRAMAS Berenger committed
732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774
#ifdef STARPU_USE_REDUX
        memset(&p2p_redux_init, 0, sizeof(p2p_redux_init));
#ifdef STARPU_USE_CPU
        p2p_redux_init.cpu_funcs[0] = FStarPUReduxCpu::InitData<typename ParticleGroupClass::ParticleDataType>;
        p2p_redux_init.where |= STARPU_CPU;
#endif
        p2p_redux_init.nbuffers = 1;
        p2p_redux_init.modes[0] = STARPU_RW;
        p2p_redux_init.name = "p2p_redux_init";

        memset(&p2p_redux_perform, 0, sizeof(p2p_redux_perform));
#ifdef STARPU_USE_CPU
        p2p_redux_perform.cpu_funcs[0] = FStarPUReduxCpu::ReduceData<typename ParticleGroupClass::ParticleDataType>;
        p2p_redux_perform.where |= STARPU_CPU;
#endif
        p2p_redux_perform.nbuffers = 2;
        p2p_redux_perform.modes[0] = STARPU_RW;
        p2p_redux_perform.modes[1] = STARPU_R;
        p2p_redux_perform.name = "p2p_redux_perform";

        memset(&p2p_redux_read, 0, sizeof(p2p_redux_read));
#ifdef STARPU_USE_CPU
        if(originalCpuKernel->supportL2P(FSTARPU_CPU_IDX)){
            p2p_redux_read.cpu_funcs[0] = FStarPUReduxCpu::EmptyCodelet<typename ParticleGroupClass::ParticleDataType>;
            p2p_redux_read.where |= STARPU_CPU;
        }
#endif
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
        if(originalCpuKernel->supportL2P(FSTARPU_CUDA_IDX)){
            p2p_redux_read.cuda_funcs[0] = FStarPUReduxCpu::EmptyCodelet<typename ParticleGroupClass::ParticleDataType>;
            p2p_redux_read.where |= STARPU_CUDA;
        }
#endif
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
        if(originalCpuKernel->supportL2P(FSTARPU_OPENCL_IDX)){
            p2p_redux_read.opencl_funcs[0] = FStarPUReduxCpu::EmptyCodelet<typename ParticleGroupClass::ParticleDataType>;
            p2p_redux_read.where |= STARPU_OPENCL;
        }
#endif
        p2p_redux_read.nbuffers = 1;
        p2p_redux_read.modes[0] = STARPU_R;
        p2p_redux_read.name = "p2p_redux_read";
#endif
BRAMAS Berenger's avatar
BRAMAS Berenger committed
775
    }
BRAMAS Berenger's avatar
BRAMAS Berenger committed
776

777 778 779
    /** dealloc in a starpu way all the defined handles */
    void cleanHandle(){
        for(int idxLevel = 0 ; idxLevel < tree->getHeight() ; ++idxLevel){
780 781 782 783
            for(int idxHandle = 0 ; idxHandle < int(cellHandles[idxLevel].size()) ; ++idxHandle){
                starpu_data_unregister(cellHandles[idxLevel][idxHandle].symb);
                starpu_data_unregister(cellHandles[idxLevel][idxHandle].up);
                starpu_data_unregister(cellHandles[idxLevel][idxHandle].down);
784
            }
785
            cellHandles[idxLevel].clear();
786 787
        }
        {
788 789 790
            for(int idxHandle = 0 ; idxHandle < int(particleHandles.size()) ; ++idxHandle){
                starpu_data_unregister(particleHandles[idxHandle].symb);
                starpu_data_unregister(particleHandles[idxHandle].down);
791
            }
792
            particleHandles.clear();
793 794 795 796
        }
    }

    ////////////////////////////////////////////////////////////////////////////
797 798

    void initCodeletMpi(){
799
        memset(&p2p_cl_inout_mpi, 0, sizeof(p2p_cl_inout_mpi));
800
#ifdef STARPU_USE_CPU
801
        if(originalCpuKernel->supportP2PMpi(FSTARPU_CPU_IDX)){
802 803 804
            p2p_cl_inout_mpi.where |= STARPU_CPU;
            p2p_cl_inout_mpi.cpu_funcs[0] = StarPUCpuWrapperClass::directInoutPassCallbackMpi;
        }
805
#endif
806
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
807
        if(originalCpuKernel->supportP2PMpi(FSTARPU_CUDA_IDX)){
808 809 810 811
            p2p_cl_inout_mpi.where |= STARPU_CUDA;
            p2p_cl_inout_mpi.cuda_funcs[0] = StarPUCudaWrapperClass::directInoutPassCallbackMpi;
        }
#endif
812
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
813
        if(originalCpuKernel->supportP2PMpi(FSTARPU_OPENCL_IDX)){
814 815 816
            p2p_cl_inout_mpi.where |= STARPU_OPENCL;
            p2p_cl_inout_mpi.opencl_funcs[0] = StarPUOpenClWrapperClass::directInoutPassCallbackMpi;
        }
817
#endif
818 819
        p2p_cl_inout_mpi.nbuffers = 3;
        p2p_cl_inout_mpi.modes[0] = STARPU_R;
820
        p2p_cl_inout_mpi.modes[1] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED);
821
        p2p_cl_inout_mpi.modes[2] = STARPU_R;
822
        p2p_cl_inout_mpi.name = "p2p_cl_inout_mpi";
823

824
        memset(&m2l_cl_inout_mpi, 0, sizeof(m2l_cl_inout_mpi));
825
#ifdef STARPU_USE_CPU
826
        if(originalCpuKernel->supportM2LMpi(FSTARPU_CPU_IDX)){
827 828 829
            m2l_cl_inout_mpi.where |= STARPU_CPU;
            m2l_cl_inout_mpi.cpu_funcs[0] = StarPUCpuWrapperClass::transferInoutPassCallbackMpi;
        }
830
#endif
831
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
832
        if(originalCpuKernel->supportM2LMpi(FSTARPU_CUDA_IDX)){
833 834 835 836
            m2l_cl_inout_mpi.where |= STARPU_CUDA;
            m2l_cl_inout_mpi.cuda_funcs[0] = StarPUCudaWrapperClass::transferInoutPassCallbackMpi;
        }
#endif
837
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
838
        if(originalCpuKernel->supportM2LMpi(FSTARPU_OPENCL_IDX)){
839 840 841
            m2l_cl_inout_mpi.where |= STARPU_OPENCL;
            m2l_cl_inout_mpi.opencl_funcs[0] = StarPUOpenClWrapperClass::transferInoutPassCallbackMpi;
        }
842
#endif
843 844
        m2l_cl_inout_mpi.nbuffers = 4;
        m2l_cl_inout_mpi.modes[0] = STARPU_R;
845
        m2l_cl_inout_mpi.modes[1] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED);
846 847
        m2l_cl_inout_mpi.modes[2] = STARPU_R;
        m2l_cl_inout_mpi.modes[3] = STARPU_R;
848
        m2l_cl_inout_mpi.name = "m2l_cl_inout_mpi";
849 850
    }

851 852 853 854 855 856
    std::vector<std::pair<MortonIndex,MortonIndex>> processesIntervalPerLevels;
    struct BlockDescriptor{
        MortonIndex firstIndex;
        MortonIndex lastIndex;
        int owner;
        int bufferSize;