Commit 5394efd2 authored by BRAMAS Berenger's avatar BRAMAS Berenger

Pass the interval size from the task submission to the opencl or cuda kernel...

Pass the interval size from the task submission to the opencl or cuda kernel call (in order to have the right number of thread blocks)
parent f964d036
......@@ -70,11 +70,13 @@ protected:
starpu_data_handle_t symb;
starpu_data_handle_t up;
starpu_data_handle_t down;
int intervalSize;
};
struct ParticleHandles{
starpu_data_handle_t symb;
starpu_data_handle_t down;
int intervalSize;
};
std::vector< std::vector< std::vector<BlockInteractions<CellContainerClass>>>> externalInteractionsAllLevel;
......@@ -492,6 +494,7 @@ protected:
(uintptr_t)currentCells->getRawMultipoleBuffer(), currentCells->getMultipoleBufferSizeInByte());
starpu_variable_data_register(&cellHandles[idxLevel][idxGroup].down, 0,
(uintptr_t)currentCells->getRawLocalBuffer(), currentCells->getLocalBufferSizeInByte());
cellHandles[idxLevel][idxGroup].intervalSize = int(currentCells->getEndingIndex() - currentCells->getStartingIndex());
}
}
{
......@@ -502,6 +505,7 @@ protected:
(uintptr_t)containers->getRawBuffer(), containers->getBufferSizeInByte());
starpu_variable_data_register(&particleHandles[idxGroup].down, 0,
(uintptr_t)containers->getRawAttributesBuffer(), containers->getAttributesBufferSizeInByte());
particleHandles[idxGroup].intervalSize = int(containers->getEndingIndex() - containers->getStartingIndex());
}
}
}
......@@ -691,6 +695,7 @@ protected:
for(int idxGroup = 0 ; idxGroup < tree->getNbParticleGroup() ; ++idxGroup){
starpu_insert_task(&p2m_cl,
STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
STARPU_VALUE, &cellHandles[tree->getHeight()-1][idxGroup].intervalSize, sizeof(int),
STARPU_PRIORITY, FStarPUFmmPriorities::Controller().getPrioP2M(),
STARPU_R, cellHandles[tree->getHeight()-1][idxGroup].symb,
STARPU_RW, cellHandles[tree->getHeight()-1][idxGroup].up,
......@@ -750,6 +755,7 @@ protected:
STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
STARPU_VALUE, &nbSubCellGroups, sizeof(nbSubCellGroups),
STARPU_VALUE, &idxLevel, sizeof(idxLevel),
STARPU_VALUE, &cellHandles[idxLevel][idxGroup].intervalSize, sizeof(int),
0);
task->cl_arg = arg_buffer;
task->cl_arg_size = arg_buffer_size;
......@@ -773,6 +779,7 @@ protected:
starpu_insert_task(&m2l_cl_in,
STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
STARPU_VALUE, &idxLevel, sizeof(idxLevel),
STARPU_VALUE, &cellHandles[idxLevel][idxGroup].intervalSize, sizeof(int),
STARPU_PRIORITY, FStarPUFmmPriorities::Controller().getPrioM2L(idxLevel),
STARPU_R, cellHandles[idxLevel][idxGroup].symb,
STARPU_R, cellHandles[idxLevel][idxGroup].up,
......@@ -791,6 +798,7 @@ protected:
STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
STARPU_VALUE, &idxLevel, sizeof(idxLevel),
STARPU_VALUE, &outsideInteractions, sizeof(outsideInteractions),
STARPU_VALUE, &cellHandles[idxLevel][idxGroup].intervalSize, sizeof(int),
STARPU_PRIORITY, FStarPUFmmPriorities::Controller().getPrioM2LExtern(idxLevel),
STARPU_R, cellHandles[idxLevel][idxGroup].symb,
STARPU_R, cellHandles[idxLevel][idxGroup].up,
......@@ -855,6 +863,7 @@ protected:
STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
STARPU_VALUE, &nbSubCellGroups, sizeof(nbSubCellGroups),
STARPU_VALUE, &idxLevel, sizeof(idxLevel),
STARPU_VALUE, &cellHandles[idxLevel][idxGroup].intervalSize, sizeof(int),
0);
task->cl_arg = arg_buffer;
task->cl_arg_size = arg_buffer_size;
......@@ -877,6 +886,7 @@ protected:
for(int idxGroup = 0 ; idxGroup < tree->getNbParticleGroup() ; ++idxGroup){
starpu_insert_task(&p2p_cl_in,
STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
STARPU_VALUE, &particleHandles[idxGroup].intervalSize, sizeof(int),
STARPU_PRIORITY, FStarPUFmmPriorities::Controller().getPrioP2P(),
STARPU_R, particleHandles[idxGroup].symb,
(STARPU_RW|STARPU_COMMUTE), particleHandles[idxGroup].down,
......@@ -891,6 +901,7 @@ protected:
starpu_insert_task(&p2p_cl_inout,
STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
STARPU_VALUE, &outsideInteractions, sizeof(outsideInteractions),
STARPU_VALUE, &particleHandles[idxGroup].intervalSize, sizeof(int),
STARPU_PRIORITY, FStarPUFmmPriorities::Controller().getPrioP2PExtern(),
STARPU_R, particleHandles[idxGroup].symb,
(STARPU_RW|STARPU_COMMUTE), particleHandles[idxGroup].down,
......@@ -915,6 +926,7 @@ protected:
for(int idxGroup = 0 ; idxGroup < tree->getNbParticleGroup() ; ++idxGroup){
starpu_insert_task(&l2p_cl,
STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
STARPU_VALUE, &cellHandles[tree->getHeight()-1][idxGroup].intervalSize, sizeof(int),
STARPU_PRIORITY, FStarPUFmmPriorities::Controller().getPrioL2P(),
STARPU_R, cellHandles[tree->getHeight()-1][idxGroup].symb,
STARPU_R, cellHandles[tree->getHeight()-1][idxGroup].down,
......
......@@ -85,11 +85,13 @@ protected:
starpu_data_handle_t symb;
starpu_data_handle_t up;
starpu_data_handle_t down;
int intervalSize;
};
struct ParticleHandles{
starpu_data_handle_t symb;
starpu_data_handle_t down;
int intervalSize;
};
std::vector< std::vector< std::vector<BlockInteractions<CellContainerClass>>>> externalInteractionsAllLevel;
......@@ -1079,6 +1081,7 @@ protected:
(uintptr_t)currentCells->getRawMultipoleBuffer(), currentCells->getMultipoleBufferSizeInByte());
starpu_variable_data_register(&cellHandles[idxLevel][idxGroup].down, 0,
(uintptr_t)currentCells->getRawLocalBuffer(), currentCells->getLocalBufferSizeInByte());
cellHandles[idxLevel][idxGroup].intervalSize = int(currentCells->getEndingIndex() - currentCells->getStartingIndex());
}
}
{
......@@ -1089,6 +1092,7 @@ protected:
(uintptr_t)containers->getRawBuffer(), containers->getBufferSizeInByte());
starpu_variable_data_register(&particleHandles[idxGroup].down, 0,
(uintptr_t)containers->getRawAttributesBuffer(), containers->getAttributesBufferSizeInByte());
particleHandles[idxGroup].intervalSize = int(containers->getEndingIndex() - containers->getStartingIndex());
}
}
}
......@@ -1278,6 +1282,7 @@ protected:
for(int idxGroup = 0 ; idxGroup < tree->getNbParticleGroup() ; ++idxGroup){
starpu_insert_task(&p2m_cl,
STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
STARPU_VALUE, &cellHandles[tree->getHeight()-1][idxGroup].intervalSize, sizeof(int),
STARPU_PRIORITY, FStarPUFmmPriorities::Controller().getPrioP2M(),
STARPU_R, cellHandles[tree->getHeight()-1][idxGroup].symb,
STARPU_RW, cellHandles[tree->getHeight()-1][idxGroup].up,
......@@ -1338,6 +1343,7 @@ protected:
STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
STARPU_VALUE, &nbSubCellGroups, sizeof(nbSubCellGroups),
STARPU_VALUE, &idxLevel, sizeof(idxLevel),
STARPU_VALUE, &cellHandles[idxLevel][idxGroup].intervalSize, sizeof(int),
0);
task->cl_arg = arg_buffer;
task->cl_arg_size = arg_buffer_size;
......@@ -1418,6 +1424,7 @@ protected:
STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
STARPU_VALUE, &nbSubCellGroups, sizeof(nbSubCellGroups),
STARPU_VALUE, &idxLevel, sizeof(idxLevel),
STARPU_VALUE, &cellHandles[idxLevel][tree->getNbCellGroupAtLevel(idxLevel)-1].intervalSize, sizeof(int),
0);
task->cl_arg = arg_buffer;
task->cl_arg_size = arg_buffer_size;
......@@ -1481,6 +1488,7 @@ protected:
STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
STARPU_VALUE, &idxLevel, sizeof(idxLevel),
STARPU_VALUE, &outsideInteractions, sizeof(outsideInteractions),
STARPU_VALUE, &cellHandles[idxLevel][idxGroup].intervalSize, sizeof(int),
STARPU_PRIORITY, FStarPUFmmPriorities::Controller().getPrioM2LMpi(idxLevel),
STARPU_R, cellHandles[idxLevel][idxGroup].symb,
(STARPU_RW|STARPU_COMMUTE), cellHandles[idxLevel][idxGroup].down,
......@@ -1506,6 +1514,7 @@ protected:
starpu_insert_task(&m2l_cl_in,
STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
STARPU_VALUE, &idxLevel, sizeof(idxLevel),
STARPU_VALUE, &cellHandles[idxLevel][idxGroup].intervalSize, sizeof(int),
STARPU_PRIORITY, FStarPUFmmPriorities::Controller().getPrioM2L(idxLevel),
STARPU_R, cellHandles[idxLevel][idxGroup].symb,
STARPU_R, cellHandles[idxLevel][idxGroup].up,
......@@ -1524,6 +1533,7 @@ protected:
STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
STARPU_VALUE, &idxLevel, sizeof(idxLevel),
STARPU_VALUE, &outsideInteractions, sizeof(outsideInteractions),
STARPU_VALUE, &cellHandles[idxLevel][idxGroup].intervalSize, sizeof(int),
STARPU_PRIORITY, FStarPUFmmPriorities::Controller().getPrioM2LExtern(idxLevel),
STARPU_R, cellHandles[idxLevel][idxGroup].symb,
STARPU_R, cellHandles[idxLevel][idxGroup].up,
......@@ -1682,6 +1692,7 @@ protected:
STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
STARPU_VALUE, &nbSubCellGroups, sizeof(nbSubCellGroups),
STARPU_VALUE, &idxLevel, sizeof(idxLevel),
STARPU_VALUE, &remoteCellGroups[idxLevel][firstOtherBlock].intervalSize, sizeof(int),// TODO !
0);
task->cl_arg = arg_buffer;
task->cl_arg_size = arg_buffer_size;
......@@ -1735,6 +1746,7 @@ protected:
STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
STARPU_VALUE, &nbSubCellGroups, sizeof(nbSubCellGroups),
STARPU_VALUE, &idxLevel, sizeof(idxLevel),
STARPU_VALUE, &cellHandles[idxLevel][idxGroup].intervalSize, sizeof(int),
0);
task->cl_arg = arg_buffer;
task->cl_arg_size = arg_buffer_size;
......@@ -1758,6 +1770,7 @@ protected:
starpu_insert_task(&p2p_cl_inout_mpi,
STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
STARPU_VALUE, &outsideInteractions, sizeof(outsideInteractions),
STARPU_VALUE, &particleHandles[idxGroup].intervalSize, sizeof(int),
STARPU_PRIORITY, FStarPUFmmPriorities::Controller().getPrioP2PMpi(),
STARPU_R, particleHandles[idxGroup].symb,
(STARPU_RW|STARPU_COMMUTE), particleHandles[idxGroup].down,
......@@ -1781,6 +1794,7 @@ protected:
for(int idxGroup = 0 ; idxGroup < tree->getNbParticleGroup() ; ++idxGroup){
starpu_insert_task(&p2p_cl_in,
STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
STARPU_VALUE, &particleHandles[idxGroup].intervalSize, sizeof(int),
STARPU_PRIORITY, FStarPUFmmPriorities::Controller().getPrioP2P(),
STARPU_R, particleHandles[idxGroup].symb,
(STARPU_RW|STARPU_COMMUTE), particleHandles[idxGroup].down,
......@@ -1795,6 +1809,7 @@ protected:
starpu_insert_task(&p2p_cl_inout,
STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
STARPU_VALUE, &outsideInteractions, sizeof(outsideInteractions),
STARPU_VALUE, &particleHandles[idxGroup].intervalSize, sizeof(int),
STARPU_PRIORITY, FStarPUFmmPriorities::Controller().getPrioP2PExtern(),
STARPU_R, particleHandles[idxGroup].symb,
(STARPU_RW|STARPU_COMMUTE), particleHandles[idxGroup].down,
......@@ -1819,6 +1834,7 @@ protected:
for(int idxGroup = 0 ; idxGroup < tree->getNbParticleGroup() ; ++idxGroup){
starpu_insert_task(&l2p_cl,
STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
STARPU_VALUE, &cellHandles[tree->getHeight()-1][idxGroup].intervalSize, sizeof(int),
STARPU_PRIORITY, FStarPUFmmPriorities::Controller().getPrioL2P(),
STARPU_R, cellHandles[tree->getHeight()-1][idxGroup].symb,
STARPU_R, cellHandles[tree->getHeight()-1][idxGroup].down,
......
This diff is collapsed.
......@@ -11,7 +11,8 @@ template <class SymboleCellClass, class PoleCellClass, class LocalCellClass,
class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__bottomPassCallback(unsigned char* leafCellsPtr, std::size_t leafCellsSize, unsigned char* leafCellsUpPtr,
unsigned char* containersPtr, std::size_t containersSize,
CudaKernelClass* kernel, cudaStream_t currentStream);
CudaKernelClass* kernel, cudaStream_t currentStream,
const dim3 inGridSize, const dim3 inBlocksSize);
template <class SymboleCellClass, class PoleCellClass, class LocalCellClass,
class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
......@@ -19,7 +20,8 @@ void FCuda__upwardPassCallback(
unsigned char* currentCellsPtr, std::size_t currentCellsSize, unsigned char* currentCellsUpPtr,
FCudaParams<unsigned char*,9> subCellGroupsPtr, FCudaParams<std::size_t, 9> subCellGroupsSize,
FCudaParams<unsigned char*,9> subCellGroupsUpPtr,
int nbSubCellGroups, int idxLevel, CudaKernelClass* kernel, cudaStream_t currentStream);
int nbSubCellGroups, int idxLevel, CudaKernelClass* kernel, cudaStream_t currentStream,
const dim3 inGridSize, const dim3 inBlocksSize);
#ifdef SCALFMM_USE_MPI
template <class SymboleCellClass, class PoleCellClass, class LocalCellClass,
class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
......@@ -27,14 +29,16 @@ void FCuda__transferInoutPassCallbackMpi(
unsigned char* currentCellsPtr, std::size_t currentCellsSize, unsigned char* currentCellsDownPtr,
unsigned char* externalCellsPtr, std::size_t externalCellsSize, unsigned char* externalCellsUpPtr,
int idxLevel, const OutOfBlockInteraction* outsideInteractions,
int nbOutsideInteractions, CudaKernelClass* kernel, cudaStream_t currentStream);
int nbOutsideInteractions, CudaKernelClass* kernel, cudaStream_t currentStream,
const dim3 inGridSize, const dim3 inBlocksSize);
#endif
template <class SymboleCellClass, class PoleCellClass, class LocalCellClass,
class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__transferInPassCallback(
unsigned char* currentCellsPtr, std::size_t currentCellsSize,
unsigned char* currentCellsUpPtr, unsigned char* currentCellsDownPtr,
int idxLevel, CudaKernelClass* kernel, cudaStream_t currentStream);
int idxLevel, CudaKernelClass* kernel, cudaStream_t currentStream,
const dim3 inGridSize, const dim3 inBlocksSize);
template <class SymboleCellClass, class PoleCellClass, class LocalCellClass,
class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
......@@ -44,7 +48,8 @@ void FCuda__transferInoutPassCallback(
unsigned char* externalCellsPtr, std::size_t externalCellsSize,
unsigned char* externalCellsUpPtr, unsigned char* externalCellsDownPtr,
int idxLevel, const OutOfBlockInteraction* outsideInteractions,
int nbOutsideInteractions, CudaKernelClass* kernel, cudaStream_t currentStream);
int nbOutsideInteractions, CudaKernelClass* kernel, cudaStream_t currentStream,
const dim3 inGridSize, const dim3 inBlocksSize);
template <class SymboleCellClass, class PoleCellClass, class LocalCellClass,
class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
......@@ -52,7 +57,8 @@ void FCuda__downardPassCallback(
unsigned char* currentCellsPtr, std::size_t currentCellsSize, unsigned char* currentCellsDownPtr,
FCudaParams<unsigned char*,9> subCellGroupsPtr, FCudaParams<std::size_t,9> subCellGroupsSize,
FCudaParams<unsigned char*,9> subCellGroupsDownPtr,
int nbSubCellGroups, int idxLevel, CudaKernelClass* kernel, cudaStream_t currentStream);
int nbSubCellGroups, int idxLevel, CudaKernelClass* kernel, cudaStream_t currentStream,
const dim3 inGridSize, const dim3 inBlocksSize);
#ifdef SCALFMM_USE_MPI
template <class SymboleCellClass, class PoleCellClass, class LocalCellClass,
class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
......@@ -60,13 +66,15 @@ void FCuda__directInoutPassCallbackMpi(
unsigned char* containersPtr, std::size_t containersSize, unsigned char* containersDownPtr,
unsigned char* externalContainersPtr, std::size_t externalContainersSize,
const OutOfBlockInteraction* outsideInteractions,
int nbOutsideInteractions, const int treeHeight, CudaKernelClass* kernel, cudaStream_t currentStream);
int nbOutsideInteractions, const int treeHeight, CudaKernelClass* kernel, cudaStream_t currentStream,
const dim3 inGridSize, const dim3 inBlocksSize);
#endif
template <class SymboleCellClass, class PoleCellClass, class LocalCellClass,
class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__directInPassCallback(
unsigned char* containersPtr, std::size_t containersSize, unsigned char* containersDownPtr,
const int treeHeight, CudaKernelClass* kernel, cudaStream_t currentStream);
const int treeHeight, CudaKernelClass* kernel, cudaStream_t currentStream,
const dim3 inGridSize, const dim3 inBlocksSize);
template <class SymboleCellClass, class PoleCellClass, class LocalCellClass,
class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
......@@ -74,14 +82,16 @@ void FCuda__directInoutPassCallback(
unsigned char* containersPtr, std::size_t containersSize, unsigned char* containersDownPtr,
unsigned char* externalContainersPtr, std::size_t externalContainersSize, unsigned char* externalContainersDownPtr,
const OutOfBlockInteraction* outsideInteractions,
int nbOutsideInteractions, const int treeHeight, CudaKernelClass* kernel, cudaStream_t currentStream);
int nbOutsideInteractions, const int treeHeight, CudaKernelClass* kernel, cudaStream_t currentStream,
const dim3 inGridSize, const dim3 inBlocksSize);
template <class SymboleCellClass, class PoleCellClass, class LocalCellClass,
class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__mergePassCallback(
unsigned char* leafCellsPtr, std::size_t leafCellsSize, unsigned char* leafCellsDownPtr,
unsigned char* containersPtr, std::size_t containersSize, unsigned char* containersDownPtr,
CudaKernelClass* kernel, cudaStream_t currentStream);
CudaKernelClass* kernel, cudaStream_t currentStream,
const dim3 inGridSize, const dim3 inBlocksSize);
template <class CudaKernelClass>
CudaKernelClass* FCuda__BuildCudaKernel(void*);
......@@ -89,4 +99,10 @@ CudaKernelClass* FCuda__BuildCudaKernel(void*);
template <class CudaKernelClass>
void FCuda__ReleaseCudaKernel(CudaKernelClass*);
template <class CudaKernelClass>
dim3 FCuda__GetGridSize(CudaKernelClass* kernel, int intervalSize);
template <class CudaKernelClass>
dim3 FCuda__GetBlockSize(CudaKernelClass* kernel);
#endif
......@@ -50,6 +50,14 @@ public:
__host__ static void ReleaseKernel(FCudaEmptyKernel* /*todealloc*/){
// nothing to do
}
__host__ static dim3 GetGridSize(const int /*intervalSize*/){
return 0;
}
__host__ static dim3 GetBlocksSize(){
return 0;
}
};
#endif // FCUDAEMPTYKERNEL_HPP
......
......@@ -83,7 +83,7 @@ public:
return 0;
}
const size_t* getNbGroups() const {
const size_t* getNbGroups(const int /*inSizeInterval*/) const {
return nullptr;
}
......
......@@ -82,7 +82,8 @@ public:
nullptr);
FStarPUPtrInterface* worker = nullptr;
starpu_codelet_unpack_args(cl_arg, &worker);
int intervalSize;
starpu_codelet_unpack_args(cl_arg, &worker, &intervalSize);
worker->get<ThisClass>(FSTARPU_CPU_IDX)->bottomPassPerform(&leafCells, &containers);
}
......@@ -115,7 +116,8 @@ public:
FStarPUPtrInterface* worker = nullptr;
int nbSubCellGroups = 0;
int idxLevel = 0;
starpu_codelet_unpack_args(cl_arg, &worker, &nbSubCellGroups, &idxLevel);
int intervalSize;
starpu_codelet_unpack_args(cl_arg, &worker, &nbSubCellGroups, &idxLevel, &intervalSize);
CellContainerClass* subCellGroups[9];
memset(subCellGroups, 0, 9*sizeof(CellContainerClass*));
......@@ -189,7 +191,8 @@ public:
FStarPUPtrInterface* worker = nullptr;
int idxLevel = 0;
const std::vector<OutOfBlockInteraction>* outsideInteractions;
starpu_codelet_unpack_args(cl_arg, &worker, &idxLevel, &outsideInteractions);
int intervalSize;
starpu_codelet_unpack_args(cl_arg, &worker, &idxLevel, &outsideInteractions, &intervalSize);
worker->get<ThisClass>(FSTARPU_CPU_IDX)->transferInoutPassPerformMpi(&currentCells, &externalCells, idxLevel, outsideInteractions);
}
......@@ -229,7 +232,8 @@ public:
FStarPUPtrInterface* worker = nullptr;
int idxLevel = 0;
starpu_codelet_unpack_args(cl_arg, &worker, &idxLevel);
int intervalSize;
starpu_codelet_unpack_args(cl_arg, &worker, &idxLevel, &intervalSize);
worker->get<ThisClass>(FSTARPU_CPU_IDX)->transferInPassPerform(&currentCells, idxLevel);
}
......@@ -283,7 +287,8 @@ public:
FStarPUPtrInterface* worker = nullptr;
int idxLevel = 0;
const std::vector<OutOfBlockInteraction>* outsideInteractions;
starpu_codelet_unpack_args(cl_arg, &worker, &idxLevel, &outsideInteractions);
int intervalSize;
starpu_codelet_unpack_args(cl_arg, &worker, &idxLevel, &outsideInteractions, &intervalSize);
worker->get<ThisClass>(FSTARPU_CPU_IDX)->transferInoutPassPerform(&currentCells, &externalCells, idxLevel, outsideInteractions);
}
......@@ -327,7 +332,8 @@ public:
FStarPUPtrInterface* worker = nullptr;
int nbSubCellGroups = 0;
int idxLevel = 0;
starpu_codelet_unpack_args(cl_arg, &worker, &nbSubCellGroups, &idxLevel);
int intervalSize;
starpu_codelet_unpack_args(cl_arg, &worker, &nbSubCellGroups, &idxLevel, &intervalSize);
CellContainerClass* subCellGroups[9];
memset(subCellGroups, 0, 9*sizeof(CellContainerClass*));
......@@ -398,7 +404,8 @@ public:
FStarPUPtrInterface* worker = nullptr;
const std::vector<OutOfBlockInteraction>* outsideInteractions = nullptr;
starpu_codelet_unpack_args(cl_arg, &worker, &outsideInteractions);
int intervalSize;
starpu_codelet_unpack_args(cl_arg, &worker, &outsideInteractions, &intervalSize);
worker->get<ThisClass>(FSTARPU_CPU_IDX)->directInoutPassPerform(&containers, &externalContainers, outsideInteractions);
}
......@@ -430,7 +437,8 @@ public:
(unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[1]));
FStarPUPtrInterface* worker = nullptr;
starpu_codelet_unpack_args(cl_arg, &worker);
int intervalSize;
starpu_codelet_unpack_args(cl_arg, &worker, &intervalSize);
worker->get<ThisClass>(FSTARPU_CPU_IDX)->directInPassPerform(&containers);
}
......@@ -478,7 +486,8 @@ public:
FStarPUPtrInterface* worker = nullptr;
const std::vector<OutOfBlockInteraction>* outsideInteractions = nullptr;
starpu_codelet_unpack_args(cl_arg, &worker, &outsideInteractions);
int intervalSize;
starpu_codelet_unpack_args(cl_arg, &worker, &outsideInteractions, &intervalSize);
worker->get<ThisClass>(FSTARPU_CPU_IDX)->directInoutPassPerform(&containers, &externalContainers, outsideInteractions);
}
......@@ -518,7 +527,8 @@ public:
(unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[3]));
FStarPUPtrInterface* worker = nullptr;
starpu_codelet_unpack_args(cl_arg, &worker);
int intervalSize;
starpu_codelet_unpack_args(cl_arg, &worker, &intervalSize);
worker->get<ThisClass>(FSTARPU_CPU_IDX)->mergePassPerform(&leafCells, &containers);
}
......
......@@ -73,7 +73,8 @@ public:
static void bottomPassCallback(void *buffers[], void *cl_arg){
FStarPUPtrInterface* worker = nullptr;
starpu_codelet_unpack_args(cl_arg, &worker);
int intervalSize;
starpu_codelet_unpack_args(cl_arg, &worker, &intervalSize, &intervalSize);
CudaKernelClass* kernel = worker->get<ThisClass>(FSTARPU_CPU_IDX)->kernels[starpu_worker_get_id()];
......@@ -84,7 +85,8 @@ public:
(unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[1]),
(unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[2]),
STARPU_VARIABLE_GET_ELEMSIZE(buffers[2]),
kernel, starpu_cuda_get_local_stream());
kernel, starpu_cuda_get_local_stream(),
FCuda__GetGridSize(kernel,intervalSize),FCuda__GetBlockSize(kernel));
}
/////////////////////////////////////////////////////////////////////////////////////
......@@ -95,7 +97,8 @@ public:
FStarPUPtrInterface* worker = nullptr;
int nbSubCellGroups = 0;
int idxLevel = 0;
starpu_codelet_unpack_args(cl_arg, &worker, &nbSubCellGroups, &idxLevel);
int intervalSize;
starpu_codelet_unpack_args(cl_arg, &worker, &nbSubCellGroups, &idxLevel, &intervalSize);
FCudaParams<unsigned char*,9> subCellGroupsPtr;
memset(&subCellGroupsPtr, 0, sizeof(subCellGroupsPtr));
......@@ -117,7 +120,8 @@ public:
STARPU_VARIABLE_GET_ELEMSIZE(buffers[0]),
(unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[1]),
subCellGroupsPtr,subCellGroupsSize,subCellGroupsUpPtr,
nbSubCellGroups, idxLevel, kernel, starpu_cuda_get_local_stream());
nbSubCellGroups, idxLevel, kernel, starpu_cuda_get_local_stream(),
FCuda__GetGridSize(kernel,intervalSize),FCuda__GetBlockSize(kernel));
}
/////////////////////////////////////////////////////////////////////////////////////
......@@ -128,7 +132,8 @@ public:
FStarPUPtrInterface* worker = nullptr;
int idxLevel = 0;
const std::vector<OutOfBlockInteraction>* outsideInteractions;
starpu_codelet_unpack_args(cl_arg, &worker, &idxLevel, &outsideInteractions);
int intervalSize;
starpu_codelet_unpack_args(cl_arg, &worker, &idxLevel, &outsideInteractions, &intervalSize);
CudaKernelClass* kernel = worker->get<ThisClass>(FSTARPU_CPU_IDX)->kernels[starpu_worker_get_id()];
......@@ -141,7 +146,8 @@ public:
STARPU_VARIABLE_GET_ELEMSIZE(buffers[2]),
(unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[3]),
idxLevel, outsideInteractions->data(), outsideInteractions->size(), kernel,
starpu_cuda_get_local_stream());
starpu_cuda_get_local_stream(),
FCuda__GetGridSize(kernel,intervalSize),FCuda__GetBlockSize(kernel));
}
#endif
/////////////////////////////////////////////////////////////////////////////////////
......@@ -151,7 +157,8 @@ public:
static void transferInPassCallback(void *buffers[], void *cl_arg){
FStarPUPtrInterface* worker = nullptr;
int idxLevel = 0;
starpu_codelet_unpack_args(cl_arg, &worker, &idxLevel);
int intervalSize;
starpu_codelet_unpack_args(cl_arg, &worker, &idxLevel, &intervalSize);
CudaKernelClass* kernel = worker->get<ThisClass>(FSTARPU_CPU_IDX)->kernels[starpu_worker_get_id()];
......@@ -161,14 +168,16 @@ public:
STARPU_VARIABLE_GET_ELEMSIZE(buffers[0]),
(unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[1]),
(unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[2]),
idxLevel, kernel, starpu_cuda_get_local_stream());
idxLevel, kernel, starpu_cuda_get_local_stream(),
FCuda__GetGridSize(kernel,intervalSize),FCuda__GetBlockSize(kernel));
}
static void transferInoutPassCallback(void *buffers[], void *cl_arg){
FStarPUPtrInterface* worker = nullptr;
int idxLevel = 0;
const std::vector<OutOfBlockInteraction>* outsideInteractions;
starpu_codelet_unpack_args(cl_arg, &worker, &idxLevel, &outsideInteractions);
int intervalSize;
starpu_codelet_unpack_args(cl_arg, &worker, &idxLevel, &outsideInteractions, &intervalSize);
CudaKernelClass* kernel = worker->get<ThisClass>(FSTARPU_CPU_IDX)->kernels[starpu_worker_get_id()];
......@@ -183,7 +192,8 @@ public:
(unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[4]),
(unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[5]),
idxLevel, outsideInteractions->data(), outsideInteractions->size(), kernel,
starpu_cuda_get_local_stream());
starpu_cuda_get_local_stream(),
FCuda__GetGridSize(kernel,intervalSize),FCuda__GetBlockSize(kernel));
}
/////////////////////////////////////////////////////////////////////////////////////
......@@ -193,7 +203,8 @@ public:
FStarPUPtrInterface* worker = nullptr;
int nbSubCellGroups = 0;
int idxLevel = 0;
starpu_codelet_unpack_args(cl_arg, &worker, &nbSubCellGroups, &idxLevel);
int intervalSize;
starpu_codelet_unpack_args(cl_arg, &worker, &nbSubCellGroups, &idxLevel, &intervalSize);
FCudaParams<unsigned char*,9> subCellGroupsPtr;
memset(&subCellGroupsPtr, 0, sizeof(subCellGroupsPtr));
......@@ -215,7 +226,8 @@ public:
STARPU_VARIABLE_GET_ELEMSIZE(buffers[0]),
(unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[1]),
subCellGroupsPtr,subCellGroupsSize,subCellGroupsDownPtr,
nbSubCellGroups, idxLevel, kernel, starpu_cuda_get_local_stream());
nbSubCellGroups, idxLevel, kernel, starpu_cuda_get_local_stream(),
FCuda__GetGridSize(kernel,intervalSize),FCuda__GetBlockSize(kernel));
}
/////////////////////////////////////////////////////////////////////////////////////
/// Direct Pass MPI
......@@ -226,7 +238,8 @@ public:
FStarPUPtrInterface* worker = nullptr;
const std::vector<OutOfBlockInteraction>* outsideInteractions = nullptr;
starpu_codelet_unpack_args(cl_arg, &worker, &outsideInteractions);
int intervalSize;
starpu_codelet_unpack_args(cl_arg, &worker, &outsideInteractions, &intervalSize);
CudaKernelClass* kernel = worker->get<ThisClass>(FSTARPU_CPU_IDX)->kernels[starpu_worker_get_id()];
......@@ -238,7 +251,8 @@ public:
(unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[2]),
STARPU_VARIABLE_GET_ELEMSIZE(buffers[2]),
outsideInteractions->data(), outsideInteractions->size(),
worker->get<ThisClass>(FSTARPU_CPU_IDX)->treeHeight ,kernel, starpu_cuda_get_local_stream());
worker->get<ThisClass>(FSTARPU_CPU_IDX)->treeHeight ,kernel, starpu_cuda_get_local_stream(),
FCuda__GetGridSize(kernel,intervalSize),FCuda__GetBlockSize(kernel));
}
#endif
/////////////////////////////////////////////////////////////////////////////////////
......@@ -247,7 +261,8 @@ public:
static void directInPassCallback(void *buffers[], void *cl_arg){
FStarPUPtrInterface* worker = nullptr;
starpu_codelet_unpack_args(cl_arg, &worker);
int intervalSize;
starpu_codelet_unpack_args(cl_arg, &worker, &intervalSize);
CudaKernelClass* kernel = worker->get<ThisClass>(FSTARPU_CPU_IDX)->kernels[starpu_worker_get_id()];
FCuda__directInPassCallback< SymboleCellClass, PoleCellClass, LocalCellClass,
......@@ -255,13 +270,15 @@ public:
(unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[0]),
STARPU_VARIABLE_GET_ELEMSIZE(buffers[0]),
(unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[1]),
worker->get<ThisClass>(FSTARPU_CPU_IDX)->treeHeight, kernel, starpu_cuda_get_local_stream());
worker->get<ThisClass>(FSTARPU_CPU_IDX)->treeHeight, kernel, starpu_cuda_get_local_stream(),
FCuda__GetGridSize(kernel,intervalSize),FCuda__GetBlockSize(kernel));
}
static void directInoutPassCallback(void *buffers[], void *cl_arg){
FStarPUPtrInterface* worker = nullptr;
const std::vector<OutOfBlockInteraction>* outsideInteractions = nullptr;
starpu_codelet_unpack_args(cl_arg, &worker, &outsideInteractions);
int intervalSize;
starpu_codelet_unpack_args(cl_arg, &worker, &outsideInteractions, &intervalSize);
CudaKernelClass* kernel = worker->get<ThisClass>(FSTARPU_CPU_IDX)->kernels[starpu_worker_get_id()];
......@@ -274,7 +291,8 @@ public:
STARPU_VARIABLE_GET_ELEMSIZE(buffers[2]),
(unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[3]),
outsideInteractions->data(), outsideInteractions->size(), worker->get<ThisClass>(FSTARPU_CPU_IDX)->treeHeight,
kernel, starpu_cuda_get_local_stream());
kernel, starpu_cuda_get_local_stream(),
FCuda__GetGridSize(kernel,intervalSize),FCuda__GetBlockSize(kernel));
}
......@@ -284,7 +302,8 @@ public:
static void mergePassCallback(void *buffers[], void *cl_arg){
FStarPUPtrInterface* worker = nullptr;
starpu_codelet_unpack_args(cl_arg, &worker);
int intervalSize;
starpu_codelet_unpack_args(cl_arg, &worker, &intervalSize);
CudaKernelClass* kernel = worker->get<ThisClass>(FSTARPU_CPU_IDX)->kernels[starpu_worker_get_id()];
......@@ -296,7 +315,8 @@ public:
(unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[2]),
STARPU_VARIABLE_GET_ELEMSIZE(buffers[2]),
(unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[3]),
kernel, starpu_cuda_get_local_stream());
kernel, starpu_cuda_get_local_stream(),
FCuda__GetGridSize(kernel,intervalSize),FCuda__GetBlockSize(kernel));
}
};
......
......@@ -77,10 +77,12 @@ public:
size_t containersSize = STARPU_VARIABLE_GET_ELEMSIZE(buffers[2]);
FStarPUPtrInterface* worker = nullptr;
starpu_codelet_unpack_args(cl_arg, &worker);
int intervalSize;
starpu_codelet_unpack_args(cl_arg, &worker, &intervalSize);
OpenCLKernelClass* kernel = worker->get<ThisClass>(FSTARPU_OPENCL_IDX)->kernels[starpu_worker_get_id()];
kernel->bottomPassPerform(leafCellsPtr, leafCellsSize, leafCellsUpPtr, containersPtr, containersSize);
kernel->bottomPassPerform(leafCellsPtr, leafCellsSize, leafCellsUpPtr, containersPtr, containersSize,
intervalSize);
}