FStarPUCpuWrapper.hpp 27.5 KB
Newer Older
1 2

// @SCALFMM_PRIVATE
3 4 5 6
#ifndef FSTARPUCPUWRAPPER_HPP
#define FSTARPUCPUWRAPPER_HPP


7 8 9 10 11 12 13
#include "../../Utils/FGlobal.hpp"
#include "../../Core/FCoreCommon.hpp"
#include "../../Utils/FQuickSort.hpp"
#include "../../Containers/FTreeCoordinate.hpp"
#include "../../Utils/FLog.hpp"
#include "../../Utils/FTic.hpp"
#include "../../Utils/FAssert.hpp"
14

15
#include "../Core/FOutOfBlockInteraction.hpp"
16

17
#ifdef SCALFMM_USE_MPI
18
#include "../../Utils/FMpi.hpp"
19 20 21 22 23 24 25
#endif

#include <vector>
#include <memory>

#include <omp.h>

26
//extern "C"{
27
#include <starpu.h>
28
//}
29 30

#ifdef STARPU_USE_MPI
31
//extern "C"{
32
#include <starpu_mpi.h>
33
//}
34 35
#endif

36
#include "FStarPUUtils.hpp"
37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63

template <class CellContainerClass, class CellClass, class KernelClass,
          class ParticleGroupClass, class ParticleContainerClass>
class FStarPUCpuWrapper {
protected:
    typedef FStarPUCpuWrapper<CellContainerClass, CellClass, KernelClass, ParticleGroupClass, ParticleContainerClass> ThisClass;

    template <class OtherBlockClass>
    struct BlockInteractions{
        OtherBlockClass* otherBlock;
        int otherBlockId;
        std::vector<OutOfBlockInteraction> interactions;
    };

    const int treeHeight;
    KernelClass* kernels[STARPU_MAXCPUS];        //< The kernels

public:
    FStarPUCpuWrapper(const int inTreeHeight): treeHeight(inTreeHeight){
        memset(kernels, 0, sizeof(KernelClass*)*STARPU_MAXCPUS);
    }

    void initKernel(const int workerId, KernelClass* originalKernel){
        FAssertLF(kernels[workerId] == nullptr);
        kernels[workerId] = new KernelClass(*originalKernel);
    }

BRAMAS Berenger's avatar
BRAMAS Berenger committed
64 65 66 67 68
    void releaseKernel(const int workerId){
        delete kernels[workerId];
        kernels[workerId] = nullptr;
    }

69 70
    ~FStarPUCpuWrapper(){
        for(int idxKernel = 0 ; idxKernel < STARPU_MAXCPUS ; ++idxKernel ){
BRAMAS Berenger's avatar
BRAMAS Berenger committed
71
            FAssertLF(kernels[idxKernel] == nullptr);
72 73 74 75 76
        }
    }

    static void bottomPassCallback(void *buffers[], void *cl_arg){
        CellContainerClass leafCells((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[0]),
77 78 79 80 81 82
                            STARPU_VARIABLE_GET_ELEMSIZE(buffers[0]),
                            (unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[1]),
                            nullptr);
        ParticleGroupClass containers((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[2]),
                            STARPU_VARIABLE_GET_ELEMSIZE(buffers[2]),
                            nullptr);
83

84
        FStarPUPtrInterface* worker = nullptr;
85 86
        int intervalSize;
        starpu_codelet_unpack_args(cl_arg, &worker, &intervalSize);
87
        worker->get<ThisClass>(FSTARPU_CPU_IDX)->bottomPassPerform(&leafCells, &containers);
88 89 90
    }

    void bottomPassPerform(CellContainerClass* leafCells, ParticleGroupClass* containers){
91
        FAssertLF(leafCells->getNumberOfCellsInBlock() == containers->getNumberOfLeavesInBlock());
92 93
        KernelClass*const kernel = kernels[starpu_worker_get_id()];

94 95 96 97 98
        for(int leafIdx = 0 ; leafIdx < leafCells->getNumberOfCellsInBlock() ; ++leafIdx){
            CellClass cell = leafCells->getUpCell(leafIdx);
            ParticleContainerClass particles = containers->template getLeaf<ParticleContainerClass>(leafIdx);
            FAssertLF(leafCells->getCellMortonIndex(leafIdx) == containers->getLeafMortonIndex(leafIdx));
            kernel->P2M(&cell, &particles);
99 100 101 102 103 104 105 106 107
        }
    }

    /////////////////////////////////////////////////////////////////////////////////////
    /// Upward Pass
    /////////////////////////////////////////////////////////////////////////////////////

    static void upwardPassCallback(void *buffers[], void *cl_arg){
        CellContainerClass currentCells((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[0]),
108 109 110
                                        STARPU_VARIABLE_GET_ELEMSIZE(buffers[0]),
                                        (unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[1]),
                                        nullptr);
111

112
        FStarPUPtrInterface* worker = nullptr;
113
        int idxLevel = 0;
114
        int intervalSize;
BRAMAS Berenger's avatar
BRAMAS Berenger committed
115
        starpu_codelet_unpack_args(cl_arg, &worker, &idxLevel, &intervalSize);
116

BRAMAS Berenger's avatar
BRAMAS Berenger committed
117 118 119 120 121
        CellContainerClass subCellGroup(
                        (unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[2]),
                        STARPU_VARIABLE_GET_ELEMSIZE(buffers[2]),
                        (unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[3]),
                        nullptr);
122

BRAMAS Berenger's avatar
BRAMAS Berenger committed
123
        worker->get<ThisClass>(FSTARPU_CPU_IDX)->upwardPassPerform(&currentCells, &subCellGroup, idxLevel);
124 125 126
    }

    void upwardPassPerform(CellContainerClass*const currentCells,
BRAMAS Berenger's avatar
BRAMAS Berenger committed
127 128
                           CellContainerClass* subCellGroup,
                           const int idxLevel){
129
        KernelClass*const kernel = kernels[starpu_worker_get_id()];
BRAMAS Berenger's avatar
BRAMAS Berenger committed
130 131 132 133 134 135 136 137

        const MortonIndex firstParent = FMath::Max(currentCells->getStartingIndex(), subCellGroup->getStartingIndex()>>3);
        const MortonIndex lastParent = FMath::Min(currentCells->getEndingIndex()-1, (subCellGroup->getEndingIndex()-1)>>3);

        int idxParentCell = currentCells->getCellIndex(firstParent);
        FAssertLF(idxParentCell != -1);

        int idxChildCell = subCellGroup->getFistChildIdx(firstParent);
138 139 140
        FAssertLF(idxChildCell != -1);
        CellClass childData[8];

BRAMAS Berenger's avatar
BRAMAS Berenger committed
141 142 143 144
        while(true){
            CellClass cell = currentCells->getUpCell(idxParentCell);
            FAssertLF(cell.getMortonIndex() == currentCells->getCellMortonIndex(idxParentCell));
            const CellClass* child[8] = {nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr};
145

BRAMAS Berenger's avatar
BRAMAS Berenger committed
146
            FAssertLF(cell.getMortonIndex() == (subCellGroup->getCellMortonIndex(idxChildCell)>>3));
147

BRAMAS Berenger's avatar
BRAMAS Berenger committed
148 149
            do{
                const int idxChild = ((subCellGroup->getCellMortonIndex(idxChildCell)) & 7);
150
                FAssertLF(child[idxChild] == nullptr);
BRAMAS Berenger's avatar
BRAMAS Berenger committed
151 152
                childData[idxChild] = subCellGroup->getUpCell(idxChildCell);
                FAssertLF(subCellGroup->getCellMortonIndex(idxChildCell) == childData[idxChild].getMortonIndex());
153
                child[idxChild] = &childData[idxChild];
BRAMAS Berenger's avatar
BRAMAS Berenger committed
154

155
                idxChildCell += 1;
BRAMAS Berenger's avatar
BRAMAS Berenger committed
156
            }while(idxChildCell != subCellGroup->getNumberOfCellsInBlock() && cell.getMortonIndex() == (subCellGroup->getCellMortonIndex(idxChildCell)>>3));
157 158

            kernel->M2M(&cell, child, idxLevel);
BRAMAS Berenger's avatar
BRAMAS Berenger committed
159 160 161 162 163 164

            if(currentCells->getCellMortonIndex(idxParentCell) == lastParent){
                break;
            }

            idxParentCell += 1;
165 166 167 168 169 170 171 172 173 174
        }
    }


    /////////////////////////////////////////////////////////////////////////////////////
    /// Transfer Pass Mpi
    /////////////////////////////////////////////////////////////////////////////////////
#ifdef STARPU_USE_MPI
    static void transferInoutPassCallbackMpi(void *buffers[], void *cl_arg){
        CellContainerClass currentCells((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[0]),
175 176 177 178 179 180 181
                                        STARPU_VARIABLE_GET_ELEMSIZE(buffers[0]),
                                        nullptr,
                                        (unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[1]));
        CellContainerClass externalCells((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[2]),
                                        STARPU_VARIABLE_GET_ELEMSIZE(buffers[2]),
                                        (unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[3]),
                                        nullptr);
182

183
        FStarPUPtrInterface* worker = nullptr;
184 185
        int idxLevel = 0;
        const std::vector<OutOfBlockInteraction>* outsideInteractions;
186 187
        int intervalSize;
        starpu_codelet_unpack_args(cl_arg, &worker, &idxLevel, &outsideInteractions, &intervalSize);
188

189
        worker->get<ThisClass>(FSTARPU_CPU_IDX)->transferInoutPassPerformMpi(&currentCells, &externalCells, idxLevel, outsideInteractions);
190 191 192 193 194 195 196 197 198 199
    }


    void transferInoutPassPerformMpi(CellContainerClass*const currentCells,
                                  CellContainerClass*const cellsOther,
                                  const int idxLevel,
                                  const std::vector<OutOfBlockInteraction>* outsideInteractions){
        KernelClass*const kernel = kernels[starpu_worker_get_id()];

        for(int outInterIdx = 0 ; outInterIdx < int(outsideInteractions->size()) ; ++outInterIdx){
200 201 202
            const int cellPos = cellsOther->getCellIndex((*outsideInteractions)[outInterIdx].outIndex);
            if(cellPos != -1){
                CellClass interCell = cellsOther->getUpCell(cellPos);
203
                FAssertLF(interCell.getMortonIndex() == (*outsideInteractions)[outInterIdx].outIndex);
204
                CellClass cell = currentCells->getDownCell((*outsideInteractions)[outInterIdx].insideIdxInBlock);
205
                FAssertLF(cell.getMortonIndex() == (*outsideInteractions)[outInterIdx].insideIndex);
206
                const CellClass* ptCell = &interCell;
BRAMAS Berenger's avatar
BRAMAS Berenger committed
207
                kernel->M2L( &cell , &ptCell, &(*outsideInteractions)[outInterIdx].relativeOutPosition, 1, idxLevel);
208 209 210 211 212 213 214 215 216 217
            }
        }
    }
#endif
    /////////////////////////////////////////////////////////////////////////////////////
    /// Transfer Pass
    /////////////////////////////////////////////////////////////////////////////////////

    static void transferInPassCallback(void *buffers[], void *cl_arg){
        CellContainerClass currentCells((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[0]),
218 219 220
                                        STARPU_VARIABLE_GET_ELEMSIZE(buffers[0]),
                                        (unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[1]),
                                        (unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[2]));
221

222
        FStarPUPtrInterface* worker = nullptr;
223
        int idxLevel = 0;
224 225
        int intervalSize;
        starpu_codelet_unpack_args(cl_arg, &worker, &idxLevel, &intervalSize);
226

227
        worker->get<ThisClass>(FSTARPU_CPU_IDX)->transferInPassPerform(&currentCells, idxLevel);
228 229 230 231 232 233
    }

    void transferInPassPerform(CellContainerClass*const currentCells, const int idxLevel){
        const MortonIndex blockStartIdx = currentCells->getStartingIndex();
        const MortonIndex blockEndIdx = currentCells->getEndingIndex();
        KernelClass*const kernel = kernels[starpu_worker_get_id()];
234 235
        const CellClass* interactions[189];
        CellClass interactionsData[189];
236

237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254
        for(int cellIdx = 0 ; cellIdx < currentCells->getNumberOfCellsInBlock() ; ++cellIdx){
            CellClass cell = currentCells->getDownCell(cellIdx);

            FAssertLF(cell.getMortonIndex() == currentCells->getCellMortonIndex(cellIdx));

            MortonIndex interactionsIndexes[189];
            int interactionsPosition[189];
            const FTreeCoordinate coord(cell.getCoordinate());
            int counter = coord.getInteractionNeighbors(idxLevel,interactionsIndexes,interactionsPosition);

            int counterExistingCell = 0;

            for(int idxInter = 0 ; idxInter < counter ; ++idxInter){
                if( blockStartIdx <= interactionsIndexes[idxInter] && interactionsIndexes[idxInter] < blockEndIdx ){
                    const int cellPos = currentCells->getCellIndex(interactionsIndexes[idxInter]);
                    if(cellPos != -1){
                        CellClass interCell = currentCells->getUpCell(cellPos);
                        FAssertLF(interCell.getMortonIndex() == interactionsIndexes[idxInter]);
255 256 257
                        interactionsPosition[counterExistingCell] = interactionsPosition[idxInter];
                        interactionsData[counterExistingCell] = interCell;
                        interactions[counterExistingCell] = &interactionsData[counterExistingCell];
258
                        counterExistingCell += 1;
259 260 261
                    }
                }
            }
262

263
            kernel->M2L( &cell , interactions, interactionsPosition, counterExistingCell, idxLevel);
264 265 266 267 268
        }
    }

    static void transferInoutPassCallback(void *buffers[], void *cl_arg){
        CellContainerClass currentCells((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[0]),
269
                                        STARPU_VARIABLE_GET_ELEMSIZE(buffers[0]),
BRAMAS Berenger's avatar
BRAMAS Berenger committed
270 271 272 273 274 275
                                        nullptr,
                                        (unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[1]));
        CellContainerClass externalCells((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[2]),
                                        STARPU_VARIABLE_GET_ELEMSIZE(buffers[2]),
                                        (unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[3]),
                                        nullptr);
276

277
        FStarPUPtrInterface* worker = nullptr;
278 279
        int idxLevel = 0;
        const std::vector<OutOfBlockInteraction>* outsideInteractions;
280
        int intervalSize;
BRAMAS Berenger's avatar
BRAMAS Berenger committed
281 282
        int mode = 0;
        starpu_codelet_unpack_args(cl_arg, &worker, &idxLevel, &outsideInteractions, &intervalSize, &mode);
283

BRAMAS Berenger's avatar
BRAMAS Berenger committed
284
        worker->get<ThisClass>(FSTARPU_CPU_IDX)->transferInoutPassPerform(&currentCells, &externalCells, idxLevel, outsideInteractions, mode);
285 286 287 288 289 290
    }


    void transferInoutPassPerform(CellContainerClass*const currentCells,
                                  CellContainerClass*const cellsOther,
                                  const int idxLevel,
BRAMAS Berenger's avatar
BRAMAS Berenger committed
291 292
                                  const std::vector<OutOfBlockInteraction>* outsideInteractions,
                                  const int mode){
293 294
        KernelClass*const kernel = kernels[starpu_worker_get_id()];

BRAMAS Berenger's avatar
BRAMAS Berenger committed
295 296
        if(mode == 1){
            for(int outInterIdx = 0 ; outInterIdx < int(outsideInteractions->size()) ; ++outInterIdx){
BRAMAS Berenger's avatar
BRAMAS Berenger committed
297 298 299 300 301 302 303
                CellClass interCell = cellsOther->getUpCell((*outsideInteractions)[outInterIdx].outsideIdxInBlock);
                FAssertLF(interCell.getMortonIndex() == (*outsideInteractions)[outInterIdx].outIndex);
                CellClass cell = currentCells->getDownCell((*outsideInteractions)[outInterIdx].insideIdxInBlock);
                FAssertLF(cell.getMortonIndex() == (*outsideInteractions)[outInterIdx].insideIndex);

                const CellClass* ptCell = &interCell;
                kernel->M2L( &cell , &ptCell, &(*outsideInteractions)[outInterIdx].relativeOutPosition, 1, idxLevel);
BRAMAS Berenger's avatar
BRAMAS Berenger committed
304 305 306 307
            }
        }
        else{
            for(int outInterIdx = 0 ; outInterIdx < int(outsideInteractions->size()) ; ++outInterIdx){
BRAMAS Berenger's avatar
BRAMAS Berenger committed
308 309 310 311 312 313 314 315
                CellClass cell = cellsOther->getUpCell((*outsideInteractions)[outInterIdx].insideIdxInBlock);
                FAssertLF(cell.getMortonIndex() == (*outsideInteractions)[outInterIdx].insideIndex);
                CellClass interCell = currentCells->getDownCell((*outsideInteractions)[outInterIdx].outsideIdxInBlock);
                FAssertLF(interCell.getMortonIndex() == (*outsideInteractions)[outInterIdx].outIndex);

                const int otherPos = getOppositeInterIndex((*outsideInteractions)[outInterIdx].relativeOutPosition);
                const CellClass* ptCell = &cell;
                kernel->M2L( &interCell , &ptCell, &otherPos, 1, idxLevel);
316 317 318 319 320 321 322 323 324
            }
        }
    }

    /////////////////////////////////////////////////////////////////////////////////////
    /// Downard Pass
    /////////////////////////////////////////////////////////////////////////////////////
    static void downardPassCallback(void *buffers[], void *cl_arg){
        CellContainerClass currentCells((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[0]),
325 326 327
                                        STARPU_VARIABLE_GET_ELEMSIZE(buffers[0]),
                                        nullptr,
                                        (unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[1]));
328

329
        FStarPUPtrInterface* worker = nullptr;
330
        int idxLevel = 0;
331
        int intervalSize;
BRAMAS Berenger's avatar
BRAMAS Berenger committed
332
        starpu_codelet_unpack_args(cl_arg, &worker, &idxLevel, &intervalSize);
333

BRAMAS Berenger's avatar
BRAMAS Berenger committed
334 335 336 337 338
        CellContainerClass subCellGroup(
                        (unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[2]),
                        STARPU_VARIABLE_GET_ELEMSIZE(buffers[2]),
                        nullptr,
                        (unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[3]));
339

BRAMAS Berenger's avatar
BRAMAS Berenger committed
340
        worker->get<ThisClass>(FSTARPU_CPU_IDX)->downardPassPerform(&currentCells, &subCellGroup, idxLevel);
341 342 343
    }

    void downardPassPerform(CellContainerClass*const currentCells,
BRAMAS Berenger's avatar
BRAMAS Berenger committed
344 345
                            CellContainerClass* subCellGroup,
                            const int idxLevel){
346
        KernelClass*const kernel = kernels[starpu_worker_get_id()];
BRAMAS Berenger's avatar
BRAMAS Berenger committed
347 348 349 350 351 352 353 354

        const MortonIndex firstParent = FMath::Max(currentCells->getStartingIndex(), subCellGroup->getStartingIndex()>>3);
        const MortonIndex lastParent = FMath::Min(currentCells->getEndingIndex()-1, (subCellGroup->getEndingIndex()-1)>>3);

        int idxParentCell = currentCells->getCellIndex(firstParent);
        FAssertLF(idxParentCell != -1);

        int idxChildCell = subCellGroup->getFistChildIdx(firstParent);
355 356 357
        FAssertLF(idxChildCell != -1);
        CellClass childData[8];

BRAMAS Berenger's avatar
BRAMAS Berenger committed
358 359 360
        while(true){
            CellClass cell = currentCells->getDownCell(idxParentCell);
            FAssertLF(cell.getMortonIndex() == currentCells->getCellMortonIndex(idxParentCell));
361 362
            CellClass* child[8] = {nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr,nullptr};

BRAMAS Berenger's avatar
BRAMAS Berenger committed
363 364 365 366
            FAssertLF(cell.getMortonIndex() == (subCellGroup->getCellMortonIndex(idxChildCell)>>3));

            do{
                const int idxChild = ((subCellGroup->getCellMortonIndex(idxChildCell)) & 7);
367
                FAssertLF(child[idxChild] == nullptr);
BRAMAS Berenger's avatar
BRAMAS Berenger committed
368 369
                childData[idxChild] = subCellGroup->getDownCell(idxChildCell);
                FAssertLF(subCellGroup->getCellMortonIndex(idxChildCell) == childData[idxChild].getMortonIndex());
370
                child[idxChild] = &childData[idxChild];
BRAMAS Berenger's avatar
BRAMAS Berenger committed
371

372
                idxChildCell += 1;
BRAMAS Berenger's avatar
BRAMAS Berenger committed
373
            }while(idxChildCell != subCellGroup->getNumberOfCellsInBlock() && cell.getMortonIndex() == (subCellGroup->getCellMortonIndex(idxChildCell)>>3));
374 375

            kernel->L2L(&cell, child, idxLevel);
BRAMAS Berenger's avatar
BRAMAS Berenger committed
376 377 378 379 380 381

            if(currentCells->getCellMortonIndex(idxParentCell) == lastParent){
                break;
            }

            idxParentCell += 1;
382 383 384 385 386 387 388 389 390 391
        }
    }

    /////////////////////////////////////////////////////////////////////////////////////
    /// Direct Pass MPI
    /////////////////////////////////////////////////////////////////////////////////////

#ifdef STARPU_USE_MPI
    static void directInoutPassCallbackMpi(void *buffers[], void *cl_arg){
        ParticleGroupClass containers((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[0]),
392 393 394 395 396
                                      STARPU_VARIABLE_GET_ELEMSIZE(buffers[0]),
                                      (unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[1]));
        ParticleGroupClass externalContainers((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[2]),
                                      STARPU_VARIABLE_GET_ELEMSIZE(buffers[2]),
                                      nullptr);
397

398
        FStarPUPtrInterface* worker = nullptr;
399
        const std::vector<OutOfBlockInteraction>* outsideInteractions = nullptr;
400 401
        int intervalSize;
        starpu_codelet_unpack_args(cl_arg, &worker, &outsideInteractions, &intervalSize);
402

BRAMAS Berenger's avatar
BRAMAS Berenger committed
403
        worker->get<ThisClass>(FSTARPU_CPU_IDX)->directInoutPassPerformMpi(&containers, &externalContainers, outsideInteractions);
404 405 406 407 408 409
    }

    void directInoutPassPerformMpi(ParticleGroupClass* containers, ParticleGroupClass* containersOther,
                                const std::vector<OutOfBlockInteraction>* outsideInteractions){
        KernelClass*const kernel = kernels[starpu_worker_get_id()];
        for(int outInterIdx = 0 ; outInterIdx < int(outsideInteractions->size()) ; ++outInterIdx){
410 411 412 413 414 415
            const int leafPos = containersOther->getLeafIndex((*outsideInteractions)[outInterIdx].outIndex);
            if(leafPos != -1){
                ParticleContainerClass interParticles = containersOther->template getLeaf<ParticleContainerClass>(leafPos);
                FAssertLF(containersOther->getLeafMortonIndex(leafPos) == (*outsideInteractions)[outInterIdx].outIndex);
                ParticleContainerClass particles = containers->template getLeaf<ParticleContainerClass>((*outsideInteractions)[outInterIdx].insideIdxInBlock);
                FAssertLF(containers->getLeafMortonIndex(leafPos) == (*outsideInteractions)[outInterIdx].insideIndex);
416 417
                ParticleContainerClass* ptrLeaf = &interParticles;
                kernel->P2PRemote( FTreeCoordinate((*outsideInteractions)[outInterIdx].insideIndex, treeHeight-1), &particles, &particles ,
BRAMAS Berenger's avatar
BRAMAS Berenger committed
418
                                   &ptrLeaf, &(*outsideInteractions)[outInterIdx].relativeOutPosition, 1);
419 420 421 422 423 424 425 426 427 428
            }
        }
    }
#endif
    /////////////////////////////////////////////////////////////////////////////////////
    /// Direct Pass
    /////////////////////////////////////////////////////////////////////////////////////

    static void directInPassCallback(void *buffers[], void *cl_arg){
        ParticleGroupClass containers((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[0]),
429 430
                                      STARPU_VARIABLE_GET_ELEMSIZE(buffers[0]),
                                      (unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[1]));
431

432
        FStarPUPtrInterface* worker = nullptr;
433 434
        int intervalSize;
        starpu_codelet_unpack_args(cl_arg, &worker, &intervalSize);
435
        worker->get<ThisClass>(FSTARPU_CPU_IDX)->directInPassPerform(&containers);
436 437 438 439 440 441 442
    }

    void directInPassPerform(ParticleGroupClass* containers){
        const MortonIndex blockStartIdx = containers->getStartingIndex();
        const MortonIndex blockEndIdx = containers->getEndingIndex();
        KernelClass*const kernel = kernels[starpu_worker_get_id()];

443 444 445 446 447
        for(int leafIdx = 0 ; leafIdx < containers->getNumberOfLeavesInBlock() ; ++leafIdx){
            ParticleContainerClass particles = containers->template getLeaf<ParticleContainerClass>(leafIdx);

            MortonIndex interactionsIndexes[26];
            int interactionsPosition[26];
448
            FTreeCoordinate coord(containers->getLeafMortonIndex(leafIdx), treeHeight-1);
449 450
            int counter = coord.getNeighborsIndexes(treeHeight,interactionsIndexes,interactionsPosition);

451 452
            ParticleContainerClass interactionsObjects[26];
            ParticleContainerClass* interactions[26];
453 454 455 456 457 458 459
            int counterExistingCell = 0;

            for(int idxInter = 0 ; idxInter < counter ; ++idxInter){
                if( blockStartIdx <= interactionsIndexes[idxInter] && interactionsIndexes[idxInter] < blockEndIdx ){
                    const int leafPos = containers->getLeafIndex(interactionsIndexes[idxInter]);
                    if(leafPos != -1){
                        interactionsObjects[counterExistingCell] = containers->template getLeaf<ParticleContainerClass>(leafPos);
460 461
                        interactionsPosition[counterExistingCell] = interactionsPosition[idxInter];
                        interactions[counterExistingCell] = &interactionsObjects[counterExistingCell];
462
                        counterExistingCell += 1;
463 464 465
                    }
                }
            }
466

467
            kernel->P2P( coord, &particles, &particles , interactions, interactionsPosition, counterExistingCell);
468 469 470 471 472
        }
    }

    static void directInoutPassCallback(void *buffers[], void *cl_arg){
        ParticleGroupClass containers((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[0]),
473 474 475 476 477
                                      STARPU_VARIABLE_GET_ELEMSIZE(buffers[0]),
                                      (unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[1]));
        ParticleGroupClass externalContainers((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[2]),
                                      STARPU_VARIABLE_GET_ELEMSIZE(buffers[2]),
                                      (unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[3]));
478

479
        FStarPUPtrInterface* worker = nullptr;
480
        const std::vector<OutOfBlockInteraction>* outsideInteractions = nullptr;
481 482
        int intervalSize;
        starpu_codelet_unpack_args(cl_arg, &worker, &outsideInteractions, &intervalSize);
483

484
        worker->get<ThisClass>(FSTARPU_CPU_IDX)->directInoutPassPerform(&containers, &externalContainers, outsideInteractions);
485 486 487 488
    }

    void directInoutPassPerform(ParticleGroupClass* containers, ParticleGroupClass* containersOther,
                                const std::vector<OutOfBlockInteraction>* outsideInteractions){
489
        KernelClass*const kernel = kernels[omp_get_thread_num()];
490
        for(int outInterIdx = 0 ; outInterIdx < int(outsideInteractions->size()) ; ++outInterIdx){
BRAMAS Berenger's avatar
BRAMAS Berenger committed
491 492 493 494 495 496 497 498 499 500 501 502
            ParticleContainerClass interParticles = containersOther->template getLeaf<ParticleContainerClass>((*outsideInteractions)[outInterIdx].outsideIdxInBlock);
            ParticleContainerClass particles = containers->template getLeaf<ParticleContainerClass>((*outsideInteractions)[outInterIdx].insideIdxInBlock);

            FAssertLF(containersOther->getLeafMortonIndex((*outsideInteractions)[outInterIdx].outsideIdxInBlock) == (*outsideInteractions)[outInterIdx].outIndex);
            FAssertLF(containers->getLeafMortonIndex((*outsideInteractions)[outInterIdx].insideIdxInBlock) == (*outsideInteractions)[outInterIdx].insideIndex);

            ParticleContainerClass* ptrLeaf = &interParticles;
            kernel->P2POuter( FTreeCoordinate((*outsideInteractions)[outInterIdx].insideIndex, treeHeight-1),
                                &particles , &ptrLeaf, &(*outsideInteractions)[outInterIdx].relativeOutPosition, 1);
            const int otherPosition = getOppositeNeighIndex((*outsideInteractions)[outInterIdx].relativeOutPosition);
            ptrLeaf = &particles;
            kernel->P2POuter( FTreeCoordinate((*outsideInteractions)[outInterIdx].outIndex, treeHeight-1),
503
                                    &interParticles , &ptrLeaf, &otherPosition, 1);
504 505 506 507 508 509 510 511 512
        }
    }

    /////////////////////////////////////////////////////////////////////////////////////
    /// Merge Pass
    /////////////////////////////////////////////////////////////////////////////////////

    static void mergePassCallback(void *buffers[], void *cl_arg){
        CellContainerClass leafCells((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[0]),
513 514 515 516 517 518
                                     STARPU_VARIABLE_GET_ELEMSIZE(buffers[0]),
                                     nullptr,
                                     (unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[1]));
        ParticleGroupClass containers((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[2]),
                                     STARPU_VARIABLE_GET_ELEMSIZE(buffers[2]),
                                     (unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[3]));
519

520
        FStarPUPtrInterface* worker = nullptr;
521 522
        int intervalSize;
        starpu_codelet_unpack_args(cl_arg, &worker, &intervalSize);
523
        worker->get<ThisClass>(FSTARPU_CPU_IDX)->mergePassPerform(&leafCells, &containers);
524 525 526
    }

    void mergePassPerform(CellContainerClass* leafCells, ParticleGroupClass* containers){
527
        FAssertLF(leafCells->getNumberOfCellsInBlock() == containers->getNumberOfLeavesInBlock());
528 529
        KernelClass*const kernel = kernels[starpu_worker_get_id()];

530 531 532 533 534 535
        for(int cellIdx = 0 ; cellIdx < leafCells->getNumberOfCellsInBlock() ; ++cellIdx){
            CellClass cell = leafCells->getDownCell(cellIdx);
            FAssertLF(cell.getMortonIndex() == leafCells->getCellMortonIndex(cellIdx));
            ParticleContainerClass particles = containers->template getLeaf<ParticleContainerClass>(cellIdx);
            FAssertLF(leafCells->getCellMortonIndex(cellIdx) == containers->getLeafMortonIndex(cellIdx));
            kernel->L2P(&cell, &particles);
536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551
        }
    }

    static int getOppositeNeighIndex(const int index) {
        // ((idxX+1)*3 + (idxY+1)) * 3 + (idxZ+1)
        return 27-index-1;
    }

    static int getOppositeInterIndex(const int index) {
        // ((( (xdiff+3) * 7) + (ydiff+3))) * 7 + zdiff + 3
        return 343-index-1;
    }
};

#endif // FSTARPUCPUWRAPPER_HPP