FGroupTaskStarpuAlgorithm.hpp 42.8 KB
Newer Older
1 2
// Keep in private GIT
// @SCALFMM_PRIVATE
BRAMAS Berenger's avatar
BRAMAS Berenger committed
3 4 5
#ifndef FGROUPTASKSTARPUALGORITHM_HPP
#define FGROUPTASKSTARPUALGORITHM_HPP

6 7 8 9 10 11 12
#include "../../Utils/FGlobal.hpp"
#include "../../Core/FCoreCommon.hpp"
#include "../../Utils/FQuickSort.hpp"
#include "../../Containers/FTreeCoordinate.hpp"
#include "../../Utils/FLog.hpp"
#include "../../Utils/FTic.hpp"
#include "../../Utils/FAssert.hpp"
BRAMAS Berenger's avatar
BRAMAS Berenger committed
13

14 15
#include "FOutOfBlockInteraction.hpp"

BRAMAS Berenger's avatar
BRAMAS Berenger committed
16 17 18 19 20 21
#include <vector>
#include <vector>

#include <omp.h>

#include <starpu.h>
22
#include "../StarPUUtils/FStarPUUtils.hpp"
BRAMAS Berenger's avatar
BRAMAS Berenger committed
23

24
#ifdef STARPU_USE_CPU
25
#include "../StarPUUtils/FStarPUCpuWrapper.hpp"
26
#endif
27
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
28 29 30 31 32
#include "../StarPUUtils/FStarPUCudaWrapper.hpp"
#include "../Cuda/FCudaEmptyKernel.hpp"
#include "../Cuda/FCudaGroupAttachedLeaf.hpp"
#include "../Cuda/FCudaGroupOfParticles.hpp"
#include "../Cuda/FCudaGroupOfCells.hpp"
33
#include "../Cuda/FCudaEmptyCellSymb.hpp"
34
#endif
35
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
36 37
#include "../StarPUUtils/FStarPUOpenClWrapper.hpp"
#include "../OpenCl/FOpenCLDeviceWrapper.hpp"
38 39
#endif

40

41
template <class OctreeClass, class CellContainerClass, class KernelClass, class ParticleGroupClass, class StarPUCpuWrapperClass
42
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
43
    , class StarPUCudaWrapperClass = FStarPUCudaWrapper<KernelClass, FCudaEmptyCellSymb, int, int, FCudaGroupOfCells<FCudaEmptyCellSymb, int, int>,
BRAMAS Berenger's avatar
BRAMAS Berenger committed
44
                                                        FCudaGroupOfParticles<int, 0, 0, int>, FCudaGroupAttachedLeaf<int, 0, 0, int>, FCudaEmptyKernel<int> >
45
#endif
46
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
47
    , class StarPUOpenClWrapperClass = FStarPUOpenClWrapper<KernelClass, FOpenCLDeviceWrapper<KernelClass>>
48 49
#endif
          >
BRAMAS Berenger's avatar
BRAMAS Berenger committed
50 51
class FGroupTaskStarPUAlgorithm {
protected:
52
    typedef FGroupTaskStarPUAlgorithm<OctreeClass, CellContainerClass, KernelClass, ParticleGroupClass, StarPUCpuWrapperClass
53
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
54
        , StarPUCudaWrapperClass
55
#endif
56
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
57
    , StarPUOpenClWrapperClass
58 59
#endif
    > ThisClass;
BRAMAS Berenger's avatar
BRAMAS Berenger committed
60 61 62 63 64 65 66 67

    template <class OtherBlockClass>
    struct BlockInteractions{
        OtherBlockClass* otherBlock;
        int otherBlockId;
        std::vector<OutOfBlockInteraction> interactions;
    };

68 69 70 71 72 73 74 75 76 77 78
    struct CellHandles{
        starpu_data_handle_t symb;
        starpu_data_handle_t up;
        starpu_data_handle_t down;
    };

    struct ParticleHandles{
        starpu_data_handle_t symb;
        starpu_data_handle_t down;
    };

BRAMAS Berenger's avatar
BRAMAS Berenger committed
79 80 81
    std::vector< std::vector< std::vector<BlockInteractions<CellContainerClass>>>> externalInteractionsAllLevel;
    std::vector< std::vector<BlockInteractions<ParticleGroupClass>>> externalInteractionsLeafLevel;

82
    int MaxThreads;         //< The number of threads
BRAMAS Berenger's avatar
BRAMAS Berenger committed
83
    OctreeClass*const tree;       //< The Tree
84
    KernelClass*const originalCpuKernel;
BRAMAS Berenger's avatar
BRAMAS Berenger committed
85

86 87
    std::vector<CellHandles>* cellHandles;
    std::vector<ParticleHandles> particleHandles;
BRAMAS Berenger's avatar
BRAMAS Berenger committed
88 89 90 91 92 93 94 95 96 97 98 99

    starpu_codelet p2m_cl;
    starpu_codelet m2m_cl[9];
    starpu_codelet l2l_cl[9];
    starpu_codelet l2p_cl;

    starpu_codelet m2l_cl_in;
    starpu_codelet m2l_cl_inout;

    starpu_codelet p2p_cl_in;
    starpu_codelet p2p_cl_inout;

100
#ifdef STARPU_USE_CPU    
101
    StarPUCpuWrapperClass cpuWrapper;
102
#endif
103
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
104 105
    StarPUCudaWrapperClass cudaWrapper;
#endif
106
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
107 108
    StarPUOpenClWrapperClass openclWrapper;
#endif
109 110 111

    FStarPUPtrInterface wrappers;
    FStarPUPtrInterface* wrapperptr;
112

BRAMAS Berenger's avatar
BRAMAS Berenger committed
113 114
public:
    FGroupTaskStarPUAlgorithm(OctreeClass*const inTree, KernelClass* inKernels, const int inMaxThreads = -1)
115
        : MaxThreads(inMaxThreads), tree(inTree), originalCpuKernel(inKernels),
116
          cellHandles(nullptr),
117 118
#ifdef STARPU_USE_CPU
            cpuWrapper(tree->getHeight()),
119
#endif
120
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
121 122
            cudaWrapper(tree->getHeight()),
#endif
123
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
124
            openclWrapper(tree->getHeight()),
125 126
#endif
            wrapperptr(&wrappers){
BRAMAS Berenger's avatar
BRAMAS Berenger committed
127 128
        FAssertLF(tree, "tree cannot be null");
        FAssertLF(inKernels, "kernels cannot be null");
129
        FAssertLF(MaxThreads <= STARPU_MAXCPUS, "number of threads to high");
BRAMAS Berenger's avatar
BRAMAS Berenger committed
130

131 132
        struct starpu_conf conf;
        FAssertLF(starpu_conf_init(&conf) == 0);
BRAMAS Berenger's avatar
BRAMAS Berenger committed
133
        // conf.ncpus = MaxThreads;
134
        FAssertLF(starpu_init(&conf) == 0);
135 136 137

        starpu_pthread_mutex_t initMutex;
        starpu_pthread_mutex_init(&initMutex, NULL);
138
#ifdef STARPU_USE_CPU
139 140 141 142 143
        FStarPUUtils::ExecOnWorkers(STARPU_CPU, [&](){
            starpu_pthread_mutex_lock(&initMutex);
            cpuWrapper.initKernel(starpu_worker_get_id(), inKernels);
            starpu_pthread_mutex_unlock(&initMutex);
        });
144
        wrappers.set(FSTARPU_CPU_IDX, &cpuWrapper);
145
#endif
146
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
147 148 149 150 151 152 153
        FStarPUUtils::ExecOnWorkers(STARPU_CUDA, [&](){
            starpu_pthread_mutex_lock(&initMutex);
            cudaWrapper.initKernel(starpu_worker_get_id(), inKernels);
            starpu_pthread_mutex_unlock(&initMutex);
        });
        wrappers.set(FSTARPU_CUDA_IDX, &cudaWrapper);
#endif
154
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
155 156 157 158 159 160
        FStarPUUtils::ExecOnWorkers(STARPU_OPENCL, [&](){
            starpu_pthread_mutex_lock(&initMutex);
            openclWrapper.initKernel(starpu_worker_get_id(), inKernels);
            starpu_pthread_mutex_unlock(&initMutex);
        });
        wrappers.set(FSTARPU_OPENCL_IDX, &openclWrapper);
161
#endif
162 163
        starpu_pthread_mutex_destroy(&initMutex);

164 165 166 167
        starpu_pause();

        MaxThreads = starpu_worker_get_count();//starpu_cpu_worker_get_count();

168
        cellHandles   = new std::vector<CellHandles>[tree->getHeight()];
169

170
        initCodelet();
BRAMAS Berenger's avatar
BRAMAS Berenger committed
171

BRAMAS Berenger's avatar
BRAMAS Berenger committed
172
        FLOG(FLog::Controller << "FGroupTaskStarPUAlgorithm (Max Worker " << starpu_worker_get_count() << ")\n");
BRAMAS Berenger's avatar
BRAMAS Berenger committed
173
#ifdef STARPU_USE_CPU
BRAMAS Berenger's avatar
BRAMAS Berenger committed
174
        FLOG(FLog::Controller << "FGroupTaskStarPUAlgorithm (Max CPU " << starpu_cpu_worker_get_count() << ")\n");
BRAMAS Berenger's avatar
BRAMAS Berenger committed
175
#endif
176
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
BRAMAS Berenger's avatar
BRAMAS Berenger committed
177 178
        FLOG(FLog::Controller << "FGroupTaskStarPUAlgorithm (Max OpenCL " << starpu_opencl_worker_get_count() << ")\n");
#endif
179
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
BRAMAS Berenger's avatar
BRAMAS Berenger committed
180 181
        FLOG(FLog::Controller << "FGroupTaskStarPUAlgorithm (Max CUDA " << starpu_cuda_worker_get_count() << ")\n");
#endif
BRAMAS Berenger's avatar
BRAMAS Berenger committed
182 183 184
    }

    ~FGroupTaskStarPUAlgorithm(){
BRAMAS Berenger's avatar
BRAMAS Berenger committed
185 186
        starpu_resume();

BRAMAS Berenger's avatar
BRAMAS Berenger committed
187
        cleanHandle();
188
        delete[] cellHandles;
BRAMAS Berenger's avatar
BRAMAS Berenger committed
189

BRAMAS Berenger's avatar
BRAMAS Berenger committed
190 191 192 193 194 195 196 197 198 199
        starpu_pthread_mutex_t releaseMutex;
        starpu_pthread_mutex_init(&releaseMutex, NULL);
#ifdef STARPU_USE_CPU
        FStarPUUtils::ExecOnWorkers(STARPU_CPU, [&](){
            starpu_pthread_mutex_lock(&releaseMutex);
            cpuWrapper.releaseKernel(starpu_worker_get_id());
            starpu_pthread_mutex_unlock(&releaseMutex);
        });
        wrappers.set(FSTARPU_CPU_IDX, &cpuWrapper);
#endif
200
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
BRAMAS Berenger's avatar
BRAMAS Berenger committed
201 202 203 204 205 206 207
        FStarPUUtils::ExecOnWorkers(STARPU_CUDA, [&](){
            starpu_pthread_mutex_lock(&releaseMutex);
            cudaWrapper.releaseKernel(starpu_worker_get_id());
            starpu_pthread_mutex_unlock(&releaseMutex);
        });
        wrappers.set(FSTARPU_CUDA_IDX, &cudaWrapper);
#endif
208
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
BRAMAS Berenger's avatar
BRAMAS Berenger committed
209 210 211 212 213 214 215 216 217
        FStarPUUtils::ExecOnWorkers(STARPU_OPENCL, [&](){
            starpu_pthread_mutex_lock(&releaseMutex);
            openclWrapper.releaseKernel(starpu_worker_get_id());
            starpu_pthread_mutex_unlock(&releaseMutex);
        });
        wrappers.set(FSTARPU_OPENCL_IDX, &openclWrapper);
#endif
        starpu_pthread_mutex_destroy(&releaseMutex);

BRAMAS Berenger's avatar
BRAMAS Berenger committed
218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233
        starpu_shutdown();
    }

    void execute(const unsigned operationsToProceed = FFmmNearAndFarFields){
        FLOG( FLog::Controller << "\tStart FGroupTaskStarPUAlgorithm\n" );

        #pragma omp parallel
        #pragma omp single
        buildExternalInteractionVecs();

        buildHandles();

        starpu_resume();

        if(operationsToProceed & FFmmP2M) bottomPass();

234
        if(operationsToProceed & FFmmM2M) upwardPass();
BRAMAS Berenger's avatar
BRAMAS Berenger committed
235

236
        if(operationsToProceed & FFmmM2L) transferPass();
BRAMAS Berenger's avatar
BRAMAS Berenger committed
237

238
        if(operationsToProceed & FFmmL2L) downardPass();
BRAMAS Berenger's avatar
BRAMAS Berenger committed
239

BRAMAS Berenger's avatar
BRAMAS Berenger committed
240 241
        if( operationsToProceed & FFmmP2P ) directPass();

242
        if( operationsToProceed & FFmmL2P ) mergePass();
BRAMAS Berenger's avatar
BRAMAS Berenger committed
243 244 245 246 247 248 249 250

        starpu_task_wait_for_all();
        starpu_pause();
    }

protected:
    void initCodelet(){
        memset(&p2m_cl, 0, sizeof(p2m_cl));
251
#ifdef STARPU_USE_CPU
252
        if(originalCpuKernel->supportP2M(FSTARPU_CPU_IDX)){
253 254 255
            p2m_cl.cpu_funcs[0] = StarPUCpuWrapperClass::bottomPassCallback;
            p2m_cl.where |= STARPU_CPU;
        }
256
#endif
257
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
258
        if(originalCpuKernel->supportP2M(FSTARPU_CUDA_IDX)){
259 260 261 262
            p2m_cl.cuda_funcs[0] = StarPUCudaWrapperClass::bottomPassCallback;
            p2m_cl.where |= STARPU_CUDA;
        }
#endif
263
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
264
        if(originalCpuKernel->supportP2M(FSTARPU_OPENCL_IDX)){
265 266 267
            p2m_cl.opencl_funcs[0] = StarPUOpenClWrapperClass::bottomPassCallback;
            p2m_cl.where |= STARPU_OPENCL;
        }
268
#endif
269 270 271 272
        p2m_cl.nbuffers = 3;
        p2m_cl.modes[0] = STARPU_R;
        p2m_cl.modes[1] = STARPU_RW;
        p2m_cl.modes[2] = STARPU_R;
273
        p2m_cl.name = "p2m_cl";
BRAMAS Berenger's avatar
BRAMAS Berenger committed
274 275 276 277

        memset(m2m_cl, 0, sizeof(m2m_cl[0])*9);
        memset(l2l_cl, 0, sizeof(l2l_cl[0])*9);
        for(int idx = 0 ; idx < 9 ; ++idx){
278
#ifdef STARPU_USE_CPU
279
            if(originalCpuKernel->supportM2M(FSTARPU_CPU_IDX)){
280 281 282
                m2m_cl[idx].cpu_funcs[0] = StarPUCpuWrapperClass::upwardPassCallback;
                m2m_cl[idx].where |= STARPU_CPU;
            }
283
#endif
284
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
285
            if(originalCpuKernel->supportM2M(FSTARPU_CUDA_IDX)){
286 287 288 289
                m2m_cl[idx].cuda_funcs[0] = StarPUCudaWrapperClass::upwardPassCallback;
                m2m_cl[idx].where |= STARPU_CUDA;
            }
#endif
290
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
291
            if(originalCpuKernel->supportM2M(FSTARPU_OPENCL_IDX)){
292 293 294
                m2m_cl[idx].opencl_funcs[0] = StarPUOpenClWrapperClass::upwardPassCallback;
                m2m_cl[idx].where |= STARPU_OPENCL;
            }
295
#endif
296 297 298 299
            m2m_cl[idx].nbuffers = (idx+2)*2;
            m2m_cl[idx].dyn_modes = (starpu_data_access_mode*)malloc(m2m_cl[idx].nbuffers*sizeof(starpu_data_access_mode));
            m2m_cl[idx].dyn_modes[0] = STARPU_R;
            m2m_cl[idx].dyn_modes[1] = STARPU_RW;
300
            m2m_cl[idx].name = "m2m_cl";
BRAMAS Berenger's avatar
BRAMAS Berenger committed
301

302
#ifdef STARPU_USE_CPU
303
            if(originalCpuKernel->supportL2L(FSTARPU_CPU_IDX)){
304 305 306
                l2l_cl[idx].cpu_funcs[0] = StarPUCpuWrapperClass::downardPassCallback;
                l2l_cl[idx].where |= STARPU_CPU;
            }
307
#endif
308
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
309
            if(originalCpuKernel->supportL2L(FSTARPU_CUDA_IDX)){
310 311 312 313
                l2l_cl[idx].cuda_funcs[0] = StarPUCudaWrapperClass::downardPassCallback;
                l2l_cl[idx].where |= STARPU_CUDA;
            }
#endif
314
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
315
            if(originalCpuKernel->supportL2L(FSTARPU_OPENCL_IDX)){
316 317 318
                l2l_cl[idx].opencl_funcs[0] = StarPUOpenClWrapperClass::downardPassCallback;
                l2l_cl[idx].where |= STARPU_OPENCL;
            }
319
#endif
320 321
            l2l_cl[idx].nbuffers = (idx+2)*2;
            l2l_cl[idx].dyn_modes = (starpu_data_access_mode*)malloc(l2l_cl[idx].nbuffers*sizeof(starpu_data_access_mode));
BRAMAS Berenger's avatar
BRAMAS Berenger committed
322
            l2l_cl[idx].dyn_modes[0] = STARPU_R;
323
            l2l_cl[idx].dyn_modes[1] = STARPU_R;
324
            l2l_cl[idx].name = "l2l_cl";
BRAMAS Berenger's avatar
BRAMAS Berenger committed
325 326

            for(int idxBuffer = 0 ; idxBuffer <= idx ; ++idxBuffer){
327 328 329 330
                m2m_cl[idx].dyn_modes[(idxBuffer*2)+2] = STARPU_R;
                m2m_cl[idx].dyn_modes[(idxBuffer*2)+3] = STARPU_R;
                l2l_cl[idx].dyn_modes[(idxBuffer*2)+2] = STARPU_R;
                l2l_cl[idx].dyn_modes[(idxBuffer*2)+3] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE);
BRAMAS Berenger's avatar
BRAMAS Berenger committed
331 332 333 334
            }
        }

        memset(&l2p_cl, 0, sizeof(l2p_cl));
335
#ifdef STARPU_USE_CPU
336
        if(originalCpuKernel->supportL2P(FSTARPU_CPU_IDX)){
337 338 339
            l2p_cl.cpu_funcs[0] = StarPUCpuWrapperClass::mergePassCallback;
            l2p_cl.where |= STARPU_CPU;
        }
340
#endif
341
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
342
        if(originalCpuKernel->supportL2P(FSTARPU_CUDA_IDX)){
343 344 345 346
            l2p_cl.cuda_funcs[0] = StarPUCudaWrapperClass::mergePassCallback;
            l2p_cl.where |= STARPU_CUDA;
        }
#endif
347
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
348
        if(originalCpuKernel->supportL2P(FSTARPU_OPENCL_IDX)){
349 350 351
            l2p_cl.opencl_funcs[0] = StarPUOpenClWrapperClass::mergePassCallback;
            l2p_cl.where |= STARPU_OPENCL;
        }
352
#endif
353
        l2p_cl.nbuffers = 4;
BRAMAS Berenger's avatar
BRAMAS Berenger committed
354
        l2p_cl.modes[0] = STARPU_R;
355 356 357
        l2p_cl.modes[1] = STARPU_R;
        l2p_cl.modes[2] = STARPU_R;
        l2p_cl.modes[3] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE);
358
        l2p_cl.name = "l2p_cl";
BRAMAS Berenger's avatar
BRAMAS Berenger committed
359 360

        memset(&p2p_cl_in, 0, sizeof(p2p_cl_in));
361
#ifdef STARPU_USE_CPU
362
        if(originalCpuKernel->supportP2P(FSTARPU_CPU_IDX)){
363 364 365
            p2p_cl_in.cpu_funcs[0] = StarPUCpuWrapperClass::directInPassCallback;
            p2p_cl_in.where |= STARPU_CPU;
        }
366
#endif
367
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
368
        if(originalCpuKernel->supportP2P(FSTARPU_CUDA_IDX)){
369 370 371 372
            p2p_cl_in.cuda_funcs[0] = StarPUCudaWrapperClass::directInPassCallback;
            p2p_cl_in.where |= STARPU_CUDA;
        }
#endif
373
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
374
        if(originalCpuKernel->supportP2P(FSTARPU_OPENCL_IDX)){
375 376 377
            p2p_cl_in.opencl_funcs[0] = StarPUOpenClWrapperClass::directInPassCallback;
            p2p_cl_in.where |= STARPU_OPENCL;
        }
378
#endif
379 380 381
        p2p_cl_in.nbuffers = 2;
        p2p_cl_in.modes[0] = STARPU_R;
        p2p_cl_in.modes[1] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE);
382
        p2p_cl_in.name = "p2p_cl_in";
BRAMAS Berenger's avatar
BRAMAS Berenger committed
383
        memset(&p2p_cl_inout, 0, sizeof(p2p_cl_inout));
384
#ifdef STARPU_USE_CPU
385
        if(originalCpuKernel->supportP2PExtern(FSTARPU_CPU_IDX)){
386 387 388
            p2p_cl_inout.cpu_funcs[0] = StarPUCpuWrapperClass::directInoutPassCallback;
            p2p_cl_inout.where |= STARPU_CPU;
        }
389
#endif
390
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
391
        if(originalCpuKernel->supportP2PExtern(FSTARPU_CUDA_IDX)){
392 393 394 395
            p2p_cl_inout.cuda_funcs[0] = StarPUCudaWrapperClass::directInoutPassCallback;
            p2p_cl_inout.where |= STARPU_CUDA;
        }
#endif
396
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
397
        if(originalCpuKernel->supportP2PExtern(FSTARPU_OPENCL_IDX)){
398 399 400
            p2p_cl_inout.opencl_funcs[0] = StarPUOpenClWrapperClass::directInoutPassCallback;
            p2p_cl_inout.where |= STARPU_OPENCL;
        }
401
#endif
402 403
        p2p_cl_inout.nbuffers = 4;
        p2p_cl_inout.modes[0] = STARPU_R;
404
        p2p_cl_inout.modes[1] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE);
405 406
        p2p_cl_inout.modes[2] = STARPU_R;
        p2p_cl_inout.modes[3] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE);
407
        p2p_cl_inout.name = "p2p_cl_inout";
BRAMAS Berenger's avatar
BRAMAS Berenger committed
408 409

        memset(&m2l_cl_in, 0, sizeof(m2l_cl_in));
410
#ifdef STARPU_USE_CPU
411
        if(originalCpuKernel->supportM2L(FSTARPU_CPU_IDX)){
412 413 414
            m2l_cl_in.cpu_funcs[0] = StarPUCpuWrapperClass::transferInPassCallback;
            m2l_cl_in.where |= STARPU_CPU;
        }
415
#endif
416
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
417
        if(originalCpuKernel->supportM2L(FSTARPU_CUDA_IDX)){
418 419 420 421
            m2l_cl_in.cuda_funcs[0] = StarPUCudaWrapperClass::transferInPassCallback;
            m2l_cl_in.where |= STARPU_CUDA;
        }
#endif
422
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
423
        if(originalCpuKernel->supportM2L(FSTARPU_OPENCL_IDX)){
424 425 426
            m2l_cl_in.opencl_funcs[0] = StarPUOpenClWrapperClass::transferInPassCallback;
            m2l_cl_in.where |= STARPU_OPENCL;
        }
427
#endif
428 429
        m2l_cl_in.nbuffers = 3;
        m2l_cl_in.modes[0] = STARPU_R;
430
        m2l_cl_in.modes[1] = STARPU_R;
431
        m2l_cl_in.modes[2] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE);
432
        m2l_cl_in.name = "m2l_cl_in";
BRAMAS Berenger's avatar
BRAMAS Berenger committed
433
        memset(&m2l_cl_inout, 0, sizeof(m2l_cl_inout));
434
#ifdef STARPU_USE_CPU
435
        if(originalCpuKernel->supportM2LExtern(FSTARPU_CPU_IDX)){
436 437 438
            m2l_cl_inout.cpu_funcs[0] = StarPUCpuWrapperClass::transferInoutPassCallback;
            m2l_cl_inout.where |= STARPU_CPU;
        }
439
#endif
440
#ifdef SCALFMM_ENABLE_CUDA_KERNEL
441
        if(originalCpuKernel->supportM2LExtern(FSTARPU_CUDA_IDX)){
442 443 444 445
            m2l_cl_inout.cuda_funcs[0] = StarPUCudaWrapperClass::transferInoutPassCallback;
            m2l_cl_inout.where |= STARPU_CUDA;
        }
#endif
446
#ifdef SCALFMM_ENABLE_OPENCL_KERNEL
447
        if(originalCpuKernel->supportM2LExtern(FSTARPU_OPENCL_IDX)){
448 449 450
            m2l_cl_inout.opencl_funcs[0] = StarPUOpenClWrapperClass::transferInoutPassCallback;
            m2l_cl_inout.where |= STARPU_OPENCL;
        }
451
#endif
452 453 454 455
        m2l_cl_inout.nbuffers = 6;
        m2l_cl_inout.modes[0] = STARPU_R;
        m2l_cl_inout.modes[1] = STARPU_R;
        m2l_cl_inout.modes[2] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE);
456
        m2l_cl_inout.modes[3] = STARPU_R;
457 458
        m2l_cl_inout.modes[4] = STARPU_R;
        m2l_cl_inout.modes[5] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE);
459
        m2l_cl_inout.name = "m2l_cl_inout";
BRAMAS Berenger's avatar
BRAMAS Berenger committed
460 461 462 463 464
    }

    /** dealloc in a starpu way all the defined handles */
    void cleanHandle(){
        for(int idxLevel = 0 ; idxLevel < tree->getHeight() ; ++idxLevel){
465 466 467 468
            for(int idxHandle = 0 ; idxHandle < int(cellHandles[idxLevel].size()) ; ++idxHandle){
                starpu_data_unregister(cellHandles[idxLevel][idxHandle].symb);
                starpu_data_unregister(cellHandles[idxLevel][idxHandle].up);
                starpu_data_unregister(cellHandles[idxLevel][idxHandle].down);
469
            }
470
            cellHandles[idxLevel].clear();
BRAMAS Berenger's avatar
BRAMAS Berenger committed
471 472
        }
        {
473 474 475
            for(int idxHandle = 0 ; idxHandle < int(particleHandles.size()) ; ++idxHandle){
                starpu_data_unregister(particleHandles[idxHandle].symb);
                starpu_data_unregister(particleHandles[idxHandle].down);
BRAMAS Berenger's avatar
BRAMAS Berenger committed
476
            }
477
            particleHandles.clear();
BRAMAS Berenger's avatar
BRAMAS Berenger committed
478 479 480 481 482 483 484 485 486 487
        }
    }

    /** Reset the handles array and create new ones to define
     * in a starpu way each block of data
     */
    void buildHandles(){
        cleanHandle();

        for(int idxLevel = 2 ; idxLevel < tree->getHeight() ; ++idxLevel){
488
            cellHandles[idxLevel].resize(tree->getNbCellGroupAtLevel(idxLevel));
BRAMAS Berenger's avatar
BRAMAS Berenger committed
489 490 491

            for(int idxGroup = 0 ; idxGroup < tree->getNbCellGroupAtLevel(idxLevel) ; ++idxGroup){
                const CellContainerClass* currentCells = tree->getCellGroup(idxLevel, idxGroup);
492
                starpu_variable_data_register(&cellHandles[idxLevel][idxGroup].symb, 0,
493
                                              (uintptr_t)currentCells->getRawBuffer(), currentCells->getBufferSizeInByte());
494 495 496 497
                starpu_variable_data_register(&cellHandles[idxLevel][idxGroup].up, 0,
                                              (uintptr_t)currentCells->getRawMultipoleBuffer(), currentCells->getMultipoleBufferSizeInByte());
                starpu_variable_data_register(&cellHandles[idxLevel][idxGroup].down, 0,
                                              (uintptr_t)currentCells->getRawLocalBuffer(), currentCells->getLocalBufferSizeInByte());
BRAMAS Berenger's avatar
BRAMAS Berenger committed
498 499 500
            }
        }
        {
501
            particleHandles.resize(tree->getNbParticleGroup());
BRAMAS Berenger's avatar
BRAMAS Berenger committed
502 503
            for(int idxGroup = 0 ; idxGroup < tree->getNbParticleGroup() ; ++idxGroup){
                ParticleGroupClass* containers = tree->getParticleGroup(idxGroup);
504
                starpu_variable_data_register(&particleHandles[idxGroup].symb, 0,
505
                                              (uintptr_t)containers->getRawBuffer(), containers->getBufferSizeInByte());
506 507
                starpu_variable_data_register(&particleHandles[idxGroup].down, 0,
                                              (uintptr_t)containers->getRawAttributesBuffer(), containers->getAttributesBufferSizeInByte());
BRAMAS Berenger's avatar
BRAMAS Berenger committed
508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543
            }
        }
    }

    /**
     * This function is creating the interactions vector between blocks.
     * It fills externalInteractionsAllLevel and externalInteractionsLeafLevel.
     * Warning, the omp task for now are using the class attributes!
     *
     */
    void buildExternalInteractionVecs(){
        FLOG( FTic timer; FTic leafTimer; FTic cellTimer; );
        // Reset interactions
        externalInteractionsAllLevel.clear();
        externalInteractionsLeafLevel.clear();
        // One per level + leaf level
        externalInteractionsAllLevel.resize(tree->getHeight());

        // First leaf level
        {
            // We create one big vector per block
            externalInteractionsLeafLevel.resize(tree->getNbParticleGroup());

            for(int idxGroup = 0 ; idxGroup < tree->getNbParticleGroup() ; ++idxGroup){
                // Create the vector
                ParticleGroupClass* containers = tree->getParticleGroup(idxGroup);

                std::vector<BlockInteractions<ParticleGroupClass>>* externalInteractions = &externalInteractionsLeafLevel[idxGroup];

                #pragma omp task default(none) firstprivate(idxGroup, containers, externalInteractions)
                { // Can be a task(inout:iterCells)
                    std::vector<OutOfBlockInteraction> outsideInteractions;
                    const MortonIndex blockStartIdx = containers->getStartingIndex();
                    const MortonIndex blockEndIdx   = containers->getEndingIndex();

                    for(MortonIndex mindex = blockStartIdx ; mindex < blockEndIdx ; ++mindex){
544
                        if(containers->exists(mindex)){
BRAMAS Berenger's avatar
BRAMAS Berenger committed
545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606
                            MortonIndex interactionsIndexes[26];
                            int interactionsPosition[26];
                            FTreeCoordinate coord(mindex, tree->getHeight()-1);
                            int counter = coord.getNeighborsIndexes(tree->getHeight(),interactionsIndexes,interactionsPosition);

                            for(int idxInter = 0 ; idxInter < counter ; ++idxInter){
                                if( blockStartIdx <= interactionsIndexes[idxInter] && interactionsIndexes[idxInter] < blockEndIdx ){
                                    // Inside block interaction, do nothing
                                }
                                else if(interactionsIndexes[idxInter] < mindex){
                                    OutOfBlockInteraction property;
                                    property.insideIndex = mindex;
                                    property.outIndex    = interactionsIndexes[idxInter];
                                    property.outPosition = interactionsPosition[idxInter];
                                    outsideInteractions.push_back(property);
                                }
                            }
                        }
                    }

                    // Sort to match external order
                    FQuickSort<OutOfBlockInteraction, int>::QsSequential(outsideInteractions.data(),int(outsideInteractions.size()));

                    int currentOutInteraction = 0;
                    for(int idxLeftGroup = 0 ; idxLeftGroup < idxGroup && currentOutInteraction < int(outsideInteractions.size()) ; ++idxLeftGroup){
                        ParticleGroupClass* leftContainers = tree->getParticleGroup(idxLeftGroup);
                        const MortonIndex blockStartIdx    = leftContainers->getStartingIndex();
                        const MortonIndex blockEndIdx      = leftContainers->getEndingIndex();

                        while(currentOutInteraction < int(outsideInteractions.size()) && outsideInteractions[currentOutInteraction].outIndex < blockStartIdx){
                            currentOutInteraction += 1;
                        }

                        int lastOutInteraction = currentOutInteraction;
                        while(lastOutInteraction < int(outsideInteractions.size()) && outsideInteractions[lastOutInteraction].outIndex < blockEndIdx){
                            lastOutInteraction += 1;
                        }

                        const int nbInteractionsBetweenBlocks = (lastOutInteraction-currentOutInteraction);
                        if(nbInteractionsBetweenBlocks){
                            externalInteractions->emplace_back();
                            BlockInteractions<ParticleGroupClass>* interactions = &externalInteractions->back();
                            interactions->otherBlock = leftContainers;
                            interactions->otherBlockId = idxLeftGroup;
                            interactions->interactions.resize(nbInteractionsBetweenBlocks);
                            std::copy(outsideInteractions.begin() + currentOutInteraction,
                                      outsideInteractions.begin() + lastOutInteraction,
                                      interactions->interactions.begin());
                        }

                        currentOutInteraction = lastOutInteraction;
                    }
                }
            }
        }
        FLOG( leafTimer.tac(); );
        FLOG( cellTimer.tic(); );
        {
            for(int idxLevel = tree->getHeight()-1 ; idxLevel >= 2 ; --idxLevel){
                externalInteractionsAllLevel[idxLevel].resize(tree->getNbCellGroupAtLevel(idxLevel));

                for(int idxGroup = 0 ; idxGroup < tree->getNbCellGroupAtLevel(idxLevel) ; ++idxGroup){
607
                    CellContainerClass* currentCells = tree->getCellGroup(idxLevel, idxGroup);
BRAMAS Berenger's avatar
BRAMAS Berenger committed
608 609 610 611 612 613 614 615 616 617

                    std::vector<BlockInteractions<CellContainerClass>>* externalInteractions = &externalInteractionsAllLevel[idxLevel][idxGroup];

                    #pragma omp task default(none) firstprivate(idxGroup, currentCells, idxLevel, externalInteractions)
                    {
                        std::vector<OutOfBlockInteraction> outsideInteractions;
                        const MortonIndex blockStartIdx = currentCells->getStartingIndex();
                        const MortonIndex blockEndIdx   = currentCells->getEndingIndex();

                        for(MortonIndex mindex = blockStartIdx ; mindex < blockEndIdx ; ++mindex){
618 619 620
                            if(currentCells->exists(mindex)){
                                typename CellContainerClass::CompleteCellClass cell = currentCells->getCompleteCell(mindex);
                                FAssertLF(cell.getMortonIndex() == mindex);
BRAMAS Berenger's avatar
BRAMAS Berenger committed
621 622
                                MortonIndex interactionsIndexes[189];
                                int interactionsPosition[189];
623
                                const FTreeCoordinate coord(cell.getCoordinate());
BRAMAS Berenger's avatar
BRAMAS Berenger committed
624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695
                                int counter = coord.getInteractionNeighbors(idxLevel,interactionsIndexes,interactionsPosition);

                                for(int idxInter = 0 ; idxInter < counter ; ++idxInter){
                                    if( blockStartIdx <= interactionsIndexes[idxInter] && interactionsIndexes[idxInter] < blockEndIdx ){
                                        // Nothing to do
                                    }
                                    else if(interactionsIndexes[idxInter] < mindex){
                                        OutOfBlockInteraction property;
                                        property.insideIndex = mindex;
                                        property.outIndex    = interactionsIndexes[idxInter];
                                        property.outPosition = interactionsPosition[idxInter];
                                        outsideInteractions.push_back(property);
                                    }
                                }
                            }
                        }

                        // Manage outofblock interaction
                        FQuickSort<OutOfBlockInteraction, int>::QsSequential(outsideInteractions.data(),int(outsideInteractions.size()));

                        int currentOutInteraction = 0;
                        for(int idxLeftGroup = 0 ; idxLeftGroup < idxGroup && currentOutInteraction < int(outsideInteractions.size()) ; ++idxLeftGroup){
                            CellContainerClass* leftCells   = tree->getCellGroup(idxLevel, idxLeftGroup);
                            const MortonIndex blockStartIdx = leftCells->getStartingIndex();
                            const MortonIndex blockEndIdx   = leftCells->getEndingIndex();

                            while(currentOutInteraction < int(outsideInteractions.size()) && outsideInteractions[currentOutInteraction].outIndex < blockStartIdx){
                                currentOutInteraction += 1;
                            }

                            int lastOutInteraction = currentOutInteraction;
                            while(lastOutInteraction < int(outsideInteractions.size()) && outsideInteractions[lastOutInteraction].outIndex < blockEndIdx){
                                lastOutInteraction += 1;
                            }

                            // Create interactions
                            const int nbInteractionsBetweenBlocks = (lastOutInteraction-currentOutInteraction);
                            if(nbInteractionsBetweenBlocks){
                                externalInteractions->emplace_back();
                                BlockInteractions<CellContainerClass>* interactions = &externalInteractions->back();
                                interactions->otherBlock = leftCells;
                                interactions->otherBlockId = idxLeftGroup;
                                interactions->interactions.resize(nbInteractionsBetweenBlocks);
                                std::copy(outsideInteractions.begin() + currentOutInteraction,
                                          outsideInteractions.begin() + lastOutInteraction,
                                          interactions->interactions.begin());
                            }

                            currentOutInteraction = lastOutInteraction;
                        }
                    }
                }
            }
        }
        FLOG( cellTimer.tac(); );

        #pragma omp taskwait

        FLOG( FLog::Controller << "\t\t Prepare in " << timer.tacAndElapsed() << "s\n" );
        FLOG( FLog::Controller << "\t\t\t Prepare at leaf level in   " << leafTimer.elapsed() << "s\n" );
        FLOG( FLog::Controller << "\t\t\t Prepare at other levels in " << cellTimer.elapsed() << "s\n" );
    }

    /////////////////////////////////////////////////////////////////////////////////////
    /// Bottom Pass
    /////////////////////////////////////////////////////////////////////////////////////

    void bottomPass(){
        FLOG( FTic timer; );

        for(int idxGroup = 0 ; idxGroup < tree->getNbParticleGroup() ; ++idxGroup){
            starpu_insert_task(&p2m_cl,
696
                    STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
697 698 699
                    STARPU_R, cellHandles[tree->getHeight()-1][idxGroup].symb,
                    STARPU_RW, cellHandles[tree->getHeight()-1][idxGroup].up,
                    STARPU_R, particleHandles[idxGroup].symb,
BRAMAS Berenger's avatar
BRAMAS Berenger committed
700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718
                    0);
        }

        FLOG( FLog::Controller << "\t\t bottomPass in " << timer.tacAndElapsed() << "s\n" );
    }

    /////////////////////////////////////////////////////////////////////////////////////
    /// Upward Pass
    /////////////////////////////////////////////////////////////////////////////////////

    void upwardPass(){
        FLOG( FTic timer; );
        for(int idxLevel = tree->getHeight()-2 ; idxLevel >= 2 ; --idxLevel){
            int idxSubGroup = 0;

            for(int idxGroup = 0 ; idxGroup < tree->getNbCellGroupAtLevel(idxLevel) ; ++idxGroup){
                CellContainerClass*const currentCells = tree->getCellGroup(idxLevel, idxGroup);

                struct starpu_task* const task = starpu_task_create();
719 720 721
                task->dyn_handles = (starpu_data_handle_t*)malloc(sizeof(starpu_data_handle_t)*20);
                task->dyn_handles[0] = cellHandles[idxLevel][idxGroup].symb;
                task->dyn_handles[1] = cellHandles[idxLevel][idxGroup].up;
BRAMAS Berenger's avatar
BRAMAS Berenger committed
722 723 724 725 726 727 728

                // Skip current group if needed
                if( tree->getCellGroup(idxLevel+1, idxSubGroup)->getEndingIndex() <= (currentCells->getStartingIndex()<<3) ){
                    ++idxSubGroup;
                    FAssertLF( idxSubGroup != tree->getNbCellGroupAtLevel(idxLevel+1) );
                    FAssertLF( (tree->getCellGroup(idxLevel+1, idxSubGroup)->getStartingIndex()>>3) == currentCells->getStartingIndex() );
                }
BRAMAS Berenger's avatar
BRAMAS Berenger committed
729

BRAMAS Berenger's avatar
BRAMAS Berenger committed
730 731
                // Copy at max 8 groups
                int nbSubCellGroups = 0;
732 733
                task->dyn_handles[(nbSubCellGroups*2) + 2] = cellHandles[idxLevel+1][idxSubGroup].symb;
                task->dyn_handles[(nbSubCellGroups*2) + 3] = cellHandles[idxLevel+1][idxSubGroup].up;
BRAMAS Berenger's avatar
BRAMAS Berenger committed
734
                nbSubCellGroups += 1;
BRAMAS Berenger's avatar
BRAMAS Berenger committed
735 736

                while(tree->getCellGroup(idxLevel+1, idxSubGroup)->getEndingIndex() <= (((currentCells->getEndingIndex()-1)<<3)+7)
737
                      && (idxSubGroup+1) != tree->getNbCellGroupAtLevel(idxLevel+1)
BRAMAS Berenger's avatar
BRAMAS Berenger committed
738
                      && tree->getCellGroup(idxLevel+1, idxSubGroup+1)->getStartingIndex() <= ((currentCells->getEndingIndex()-1)<<3)+7 ){
739
                    idxSubGroup += 1;
740 741
                    task->dyn_handles[(nbSubCellGroups*2) + 2] = cellHandles[idxLevel+1][idxSubGroup].symb;
                    task->dyn_handles[(nbSubCellGroups*2) + 3] = cellHandles[idxLevel+1][idxSubGroup].up;
BRAMAS Berenger's avatar
BRAMAS Berenger committed
742 743 744 745 746 747 748 749 750 751
                    nbSubCellGroups += 1;
                    FAssertLF( nbSubCellGroups <= 9 );
                }

                // put the right codelet
                task->cl = &m2m_cl[nbSubCellGroups-1];
                // put args values
                char *arg_buffer;
                size_t arg_buffer_size;
                starpu_codelet_pack_args((void**)&arg_buffer, &arg_buffer_size,
752
                                         STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
BRAMAS Berenger's avatar
BRAMAS Berenger committed
753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774
                                         STARPU_VALUE, &nbSubCellGroups, sizeof(nbSubCellGroups),
                                         STARPU_VALUE, &idxLevel, sizeof(idxLevel),
                                         0);
                task->cl_arg = arg_buffer;
                task->cl_arg_size = arg_buffer_size;
                FAssertLF(starpu_task_submit(task) == 0);
            }
        }
        FLOG( FLog::Controller << "\t\t upwardPass in " << timer.tacAndElapsed() << "s\n" );
    }

    /////////////////////////////////////////////////////////////////////////////////////
    /// Transfer Pass
    /////////////////////////////////////////////////////////////////////////////////////

    void transferPass(){
        FLOG( FTic timer; );
        FLOG( FTic timerInBlock; FTic timerOutBlock; );
        for(int idxLevel = tree->getHeight()-1 ; idxLevel >= 2 ; --idxLevel){
            FLOG( timerInBlock.tic() );
            for(int idxGroup = 0 ; idxGroup < tree->getNbCellGroupAtLevel(idxLevel) ; ++idxGroup){
                starpu_insert_task(&m2l_cl_in,
775
                        STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
BRAMAS Berenger's avatar
BRAMAS Berenger committed
776
                        STARPU_VALUE, &idxLevel, sizeof(idxLevel),
777 778 779
                                   STARPU_R, cellHandles[idxLevel][idxGroup].symb,
                                   STARPU_R, cellHandles[idxLevel][idxGroup].up,
                                   (STARPU_RW|STARPU_COMMUTE), cellHandles[idxLevel][idxGroup].down,
BRAMAS Berenger's avatar
BRAMAS Berenger committed
780 781 782 783 784 785 786 787 788 789 790
                        0);
            }
            FLOG( timerInBlock.tac() );

            FLOG( timerOutBlock.tic() );
            for(int idxGroup = 0 ; idxGroup < tree->getNbCellGroupAtLevel(idxLevel) ; ++idxGroup){
                for(int idxInteraction = 0; idxInteraction < int(externalInteractionsAllLevel[idxLevel][idxGroup].size()) ; ++idxInteraction){
                    const int interactionid = externalInteractionsAllLevel[idxLevel][idxGroup][idxInteraction].otherBlockId;
                    const std::vector<OutOfBlockInteraction>* outsideInteractions = &externalInteractionsAllLevel[idxLevel][idxGroup][idxInteraction].interactions;

                    starpu_insert_task(&m2l_cl_inout,
791
                            STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
BRAMAS Berenger's avatar
BRAMAS Berenger committed
792
                            STARPU_VALUE, &idxLevel, sizeof(idxLevel),
793
                            STARPU_VALUE, &outsideInteractions, sizeof(outsideInteractions),
794 795 796 797 798 799
                               STARPU_R, cellHandles[idxLevel][idxGroup].symb,
                               STARPU_R, cellHandles[idxLevel][idxGroup].up,
                               (STARPU_RW|STARPU_COMMUTE), cellHandles[idxLevel][idxGroup].down,
                               STARPU_R, cellHandles[idxLevel][interactionid].symb,
                               STARPU_R, cellHandles[idxLevel][interactionid].up,
                               (STARPU_RW|STARPU_COMMUTE), cellHandles[idxLevel][interactionid].down,
BRAMAS Berenger's avatar
BRAMAS Berenger committed
800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822
                            0);
                }
            }
            FLOG( timerOutBlock.tac() );
        }
        FLOG( FLog::Controller << "\t\t transferPass in " << timer.tacAndElapsed() << "s\n" );
        FLOG( FLog::Controller << "\t\t\t inblock in  " << timerInBlock.elapsed() << "s\n" );
        FLOG( FLog::Controller << "\t\t\t outblock in " << timerOutBlock.elapsed() << "s\n" );
    }

    /////////////////////////////////////////////////////////////////////////////////////
    /// Downard Pass
    /////////////////////////////////////////////////////////////////////////////////////

    void downardPass(){
        FLOG( FTic timer; );
        for(int idxLevel = 2 ; idxLevel <= tree->getHeight()-2 ; ++idxLevel){
            int idxSubGroup = 0;

            for(int idxGroup = 0 ; idxGroup < tree->getNbCellGroupAtLevel(idxLevel) ; ++idxGroup){
                CellContainerClass*const currentCells = tree->getCellGroup(idxLevel, idxGroup);

                struct starpu_task* const task = starpu_task_create();
823 824 825
                task->dyn_handles = (starpu_data_handle_t*)malloc(sizeof(starpu_data_handle_t)*20);
                task->dyn_handles[0] = cellHandles[idxLevel][idxGroup].symb;
                task->dyn_handles[1] = cellHandles[idxLevel][idxGroup].down;
BRAMAS Berenger's avatar
BRAMAS Berenger committed
826 827 828 829 830 831 832 833 834

                // Skip current group if needed
                if( tree->getCellGroup(idxLevel+1, idxSubGroup)->getEndingIndex() <= (currentCells->getStartingIndex()<<3) ){
                    ++idxSubGroup;
                    FAssertLF( idxSubGroup != tree->getNbCellGroupAtLevel(idxLevel+1) );
                    FAssertLF( (tree->getCellGroup(idxLevel+1, idxSubGroup)->getStartingIndex()>>3) == currentCells->getStartingIndex() );
                }
                // Copy at max 8 groups
                int nbSubCellGroups = 0;
835 836
                task->dyn_handles[(nbSubCellGroups*2) + 2] = cellHandles[idxLevel+1][idxSubGroup].symb;
                task->dyn_handles[(nbSubCellGroups*2) + 3] = cellHandles[idxLevel+1][idxSubGroup].down;
BRAMAS Berenger's avatar
BRAMAS Berenger committed
837
                nbSubCellGroups += 1;
BRAMAS Berenger's avatar
BRAMAS Berenger committed
838
                while(tree->getCellGroup(idxLevel+1, idxSubGroup)->getEndingIndex() <= (((currentCells->getEndingIndex()-1)<<3)+7)
839
                      && (idxSubGroup+1) != tree->getNbCellGroupAtLevel(idxLevel+1)
BRAMAS Berenger's avatar
BRAMAS Berenger committed
840
                      && tree->getCellGroup(idxLevel+1, idxSubGroup+1)->getStartingIndex() <= ((currentCells->getEndingIndex()-1)<<3)+7 ){
841
                    idxSubGroup += 1;
842 843
                    task->dyn_handles[(nbSubCellGroups*2) + 2] = cellHandles[idxLevel+1][idxSubGroup].symb;
                    task->dyn_handles[(nbSubCellGroups*2) + 3] = cellHandles[idxLevel+1][idxSubGroup].down;
BRAMAS Berenger's avatar
BRAMAS Berenger committed
844 845 846 847 848 849 850 851 852 853
                    nbSubCellGroups += 1;
                    FAssertLF( nbSubCellGroups <= 9 );
                }

                // put the right codelet
                task->cl = &l2l_cl[nbSubCellGroups-1];
                // put args values
                char *arg_buffer;
                size_t arg_buffer_size;
                starpu_codelet_pack_args((void**)&arg_buffer, &arg_buffer_size,
854
                                         STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
BRAMAS Berenger's avatar
BRAMAS Berenger committed
855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876
                                         STARPU_VALUE, &nbSubCellGroups, sizeof(nbSubCellGroups),
                                         STARPU_VALUE, &idxLevel, sizeof(idxLevel),
                                         0);
                task->cl_arg = arg_buffer;
                task->cl_arg_size = arg_buffer_size;
                FAssertLF(starpu_task_submit(task) == 0);
            }
        }
        FLOG( FLog::Controller << "\t\t downardPass in " << timer.tacAndElapsed() << "s\n" );
    }

    /////////////////////////////////////////////////////////////////////////////////////
    /// Direct Pass
    /////////////////////////////////////////////////////////////////////////////////////

    void directPass(){
        FLOG( FTic timer; );
        FLOG( FTic timerInBlock; FTic timerOutBlock; );

        FLOG( timerInBlock.tic() );
        for(int idxGroup = 0 ; idxGroup < tree->getNbParticleGroup() ; ++idxGroup){
            starpu_insert_task(&p2p_cl_in,
877
                    STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
878 879
                               STARPU_R, particleHandles[idxGroup].symb,
                               (STARPU_RW|STARPU_COMMUTE), particleHandles[idxGroup].down,
BRAMAS Berenger's avatar
BRAMAS Berenger committed
880 881 882 883 884
                    0);
        }
        FLOG( timerInBlock.tac() );
        FLOG( timerOutBlock.tic() );
        for(int idxGroup = 0 ; idxGroup < tree->getNbParticleGroup() ; ++idxGroup){
885
            for(int idxInteraction = 0; idxInteraction < int(externalInteractionsLeafLevel[idxGroup].size()) ; ++idxInteraction){
BRAMAS Berenger's avatar
BRAMAS Berenger committed
886 887
                const int interactionid = externalInteractionsLeafLevel[idxGroup][idxInteraction].otherBlockId;
                const std::vector<OutOfBlockInteraction>* outsideInteractions = &externalInteractionsLeafLevel[idxGroup][idxInteraction].interactions;
888
                starpu_insert_task(&p2p_cl_inout,
889
                        STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
890
                        STARPU_VALUE, &outsideInteractions, sizeof(outsideInteractions),
891 892 893 894
                        STARPU_R, particleHandles[idxGroup].symb,
                                   (STARPU_RW|STARPU_COMMUTE), particleHandles[idxGroup].down,
                        STARPU_R, particleHandles[interactionid].symb,
                                   (STARPU_RW|STARPU_COMMUTE), particleHandles[interactionid].down,
BRAMAS Berenger's avatar
BRAMAS Berenger committed
895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912
                        0);
            }
        }
        FLOG( timerOutBlock.tac() );

        FLOG( FLog::Controller << "\t\t directPass in " << timer.tacAndElapsed() << "s\n" );
        FLOG( FLog::Controller << "\t\t\t inblock  in " << timerInBlock.elapsed() << "s\n" );
        FLOG( FLog::Controller << "\t\t\t outblock in " << timerOutBlock.elapsed() << "s\n" );
    }
    /////////////////////////////////////////////////////////////////////////////////////
    /// Merge Pass
    /////////////////////////////////////////////////////////////////////////////////////

    void mergePass(){
        FLOG( FTic timer; );

        for(int idxGroup = 0 ; idxGroup < tree->getNbParticleGroup() ; ++idxGroup){
            starpu_insert_task(&l2p_cl,
913
                    STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
914 915 916 917
                    STARPU_R, cellHandles[tree->getHeight()-1][idxGroup].symb,
                    STARPU_R, cellHandles[tree->getHeight()-1][idxGroup].down,
                    STARPU_R, particleHandles[idxGroup].symb,
                    (STARPU_RW|STARPU_COMMUTE), particleHandles[idxGroup].down,
BRAMAS Berenger's avatar
BRAMAS Berenger committed
918 919 920 921 922 923 924 925
                    0);
        }

        FLOG( FLog::Controller << "\t\t L2P in " << timer.tacAndElapsed() << "s\n" );
    }
};

#endif // FGROUPTASKSTARPUALGORITHM_HPP