Commit 16836b35 authored by PIACIBELLO Cyrille's avatar PIACIBELLO Cyrille
Browse files
parents d70b6354 f43f7b03
......@@ -21,7 +21,16 @@ extern "C"{
#include <starpu.h>
}
#ifdef STARPU_USE_CPU
#include "FStarPUCpuWrapper.hpp"
#endif
#ifdef STARPU_USE_CUDA
#include "FStarPUCudaWrapper.hpp"
#endif
#ifdef STARPU_USE_OPENCL
#include "FStarPUOpenClWrapper.hpp"
#endif
#include "FStarPUUtils.hpp"
template <class OctreeClass, class CellContainerClass, class CellClass, class KernelClass, class ParticleGroupClass, class ParticleContainerClass>
......@@ -51,6 +60,7 @@ protected:
int MaxThreads; //< The number of threads
OctreeClass*const tree; //< The Tree
KernelClass*const originalCpuKernel;
std::vector<starpu_data_handle_t>* handles_up;
std::vector<starpu_data_handle_t>* handles_down;
......@@ -66,15 +76,36 @@ protected:
starpu_codelet p2p_cl_in;
starpu_codelet p2p_cl_inout;
#ifdef STARPU_USE_CPU
typedef FStarPUCpuWrapper<CellContainerClass, CellClass, KernelClass, ParticleGroupClass, ParticleContainerClass> StarPUCpuWrapperClass;
StarPUCpuWrapperClass cpuWrapper;
StarPUCpuWrapperClass* wrapperptr;
#endif
#ifdef STARPU_USE_CUDA
typedef FStarPUCudaWrapper<CellContainerClass, CellClass, KernelClass, ParticleGroupClass, ParticleContainerClass> StarPUCudaWrapperClass;
StarPUCudaWrapperClass cudaWrapper;
#endif
#ifdef STARPU_USE_OPENCL
typedef FStarPUOpenClWrapper<CellContainerClass, CellClass, KernelClass, ParticleGroupClass, ParticleContainerClass> StarPUOpenClWrapperClass;
StarPUOpenClWrapperClass openclWrapper;
#endif
FStarPUPtrInterface wrappers;
FStarPUPtrInterface* wrapperptr;
public:
FGroupTaskStarPUAlgorithm(OctreeClass*const inTree, KernelClass* inKernels, const int inMaxThreads = -1)
: MaxThreads(inMaxThreads), tree(inTree),
: MaxThreads(inMaxThreads), tree(inTree), originalCpuKernel(inKernels),
handles_up(nullptr), handles_down(nullptr),
cpuWrapper(tree->getHeight()), wrapperptr(&cpuWrapper){
#ifdef STARPU_USE_CPU
cpuWrapper(tree->getHeight()),
#endif
#ifdef STARPU_USE_CUDA
cudaWrapper(tree->getHeight()),
#endif
#ifdef STARPU_USE_OPENCL
openclWrapper(tree->getHeight()),
#endif
wrapperptr(&wrappers){
FAssertLF(tree, "tree cannot be null");
FAssertLF(inKernels, "kernels cannot be null");
FAssertLF(MaxThreads <= STARPU_MAXCPUS, "number of threads to high");
......@@ -86,11 +117,30 @@ public:
starpu_pthread_mutex_t initMutex;
starpu_pthread_mutex_init(&initMutex, NULL);
#ifdef STARPU_USE_CPU
FStarPUUtils::ExecOnWorkers(STARPU_CPU, [&](){
starpu_pthread_mutex_lock(&initMutex);
cpuWrapper.initKernel(starpu_worker_get_id(), inKernels);
starpu_pthread_mutex_unlock(&initMutex);
});
wrappers.set(FSTARPU_CPU_IDX, &cpuWrapper);
#endif
#ifdef STARPU_USE_CUDA
FStarPUUtils::ExecOnWorkers(STARPU_CUDA, [&](){
starpu_pthread_mutex_lock(&initMutex);
cudaWrapper.initKernel(starpu_worker_get_id(), inKernels);
starpu_pthread_mutex_unlock(&initMutex);
});
wrappers.set(FSTARPU_CUDA_IDX, &cudaWrapper);
#endif
#ifdef STARPU_USE_OPENCL
FStarPUUtils::ExecOnWorkers(STARPU_OPENCL, [&](){
starpu_pthread_mutex_lock(&initMutex);
openclWrapper.initKernel(starpu_worker_get_id(), inKernels);
starpu_pthread_mutex_unlock(&initMutex);
});
wrappers.set(FSTARPU_OPENCL_IDX, &openclWrapper);
#endif
starpu_pthread_mutex_destroy(&initMutex);
starpu_pause();
......@@ -125,8 +175,6 @@ public:
starpu_resume();
if( operationsToProceed & FFmmP2P ) directPass();
if(operationsToProceed & FFmmP2M) bottomPass();
if(operationsToProceed & FFmmM2M) upwardPass();
......@@ -135,6 +183,8 @@ public:
if(operationsToProceed & FFmmL2L) downardPass();
if( operationsToProceed & FFmmP2P ) directPass();
if( operationsToProceed & FFmmL2P ) mergePass();
starpu_task_wait_for_all();
......@@ -144,8 +194,24 @@ public:
protected:
void initCodelet(){
memset(&p2m_cl, 0, sizeof(p2m_cl));
p2m_cl.where = STARPU_CPU;
p2m_cl.cpu_funcs[0] = StarPUCpuWrapperClass::bottomPassCallback;
#ifdef STARPU_USE_CPU
if(originalCpuKernel->supportP2M()){
p2m_cl.cpu_funcs[0] = StarPUCpuWrapperClass::bottomPassCallback;
p2m_cl.where |= STARPU_CPU;
}
#endif
#ifdef STARPU_USE_CUDA
if(originalCpuKernel->supportP2M()){
p2m_cl.cuda_funcs[0] = StarPUCudaWrapperClass::bottomPassCallback;
p2m_cl.where |= STARPU_CUDA;
}
#endif
#ifdef STARPU_USE_OPENCL
if(originalCpuKernel->supportP2M()){
p2m_cl.opencl_funcs[0] = StarPUOpenClWrapperClass::bottomPassCallback;
p2m_cl.where |= STARPU_OPENCL;
}
#endif
p2m_cl.nbuffers = 2;
p2m_cl.modes[0] = STARPU_RW;
p2m_cl.modes[1] = STARPU_R;
......@@ -154,15 +220,47 @@ protected:
memset(m2m_cl, 0, sizeof(m2m_cl[0])*9);
memset(l2l_cl, 0, sizeof(l2l_cl[0])*9);
for(int idx = 0 ; idx < 9 ; ++idx){
m2m_cl[idx].where = STARPU_CPU;
m2m_cl[idx].cpu_funcs[0] = StarPUCpuWrapperClass::upwardPassCallback;
#ifdef STARPU_USE_CPU
if(originalCpuKernel->supportM2M()){
m2m_cl[idx].cpu_funcs[0] = StarPUCpuWrapperClass::upwardPassCallback;
m2m_cl[idx].where |= STARPU_CPU;
}
#endif
#ifdef STARPU_USE_CUDA
if(originalCpuKernel->supportM2M()){
m2m_cl[idx].cuda_funcs[0] = StarPUCudaWrapperClass::upwardPassCallback;
m2m_cl[idx].where |= STARPU_CUDA;
}
#endif
#ifdef STARPU_USE_OPENCL
if(originalCpuKernel->supportM2M()){
m2m_cl[idx].opencl_funcs[0] = StarPUOpenClWrapperClass::upwardPassCallback;
m2m_cl[idx].where |= STARPU_OPENCL;
}
#endif
m2m_cl[idx].nbuffers = idx+2;
m2m_cl[idx].dyn_modes = (starpu_data_access_mode*)malloc((idx+2)*sizeof(starpu_data_access_mode));
m2m_cl[idx].dyn_modes[0] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE);
m2m_cl[idx].dyn_modes[0] = STARPU_RW;
m2m_cl[idx].name = "m2m_cl";
l2l_cl[idx].where = STARPU_CPU;
l2l_cl[idx].cpu_funcs[0] = StarPUCpuWrapperClass::downardPassCallback;
#ifdef STARPU_USE_CPU
if(originalCpuKernel->supportL2L()){
l2l_cl[idx].cpu_funcs[0] = StarPUCpuWrapperClass::downardPassCallback;
l2l_cl[idx].where |= STARPU_CPU;
}
#endif
#ifdef STARPU_USE_CUDA
if(originalCpuKernel->supportL2L()){
l2l_cl[idx].cuda_funcs[0] = StarPUCudaWrapperClass::downardPassCallback;
l2l_cl[idx].where |= STARPU_CUDA;
}
#endif
#ifdef STARPU_USE_OPENCL
if(originalCpuKernel->supportL2L()){
l2l_cl[idx].opencl_funcs[0] = StarPUOpenClWrapperClass::downardPassCallback;
l2l_cl[idx].where |= STARPU_OPENCL;
}
#endif
l2l_cl[idx].nbuffers = idx+2;
l2l_cl[idx].dyn_modes = (starpu_data_access_mode*)malloc((idx+2)*sizeof(starpu_data_access_mode));
l2l_cl[idx].dyn_modes[0] = STARPU_R;
......@@ -175,37 +273,117 @@ protected:
}
memset(&l2p_cl, 0, sizeof(l2p_cl));
l2p_cl.where = STARPU_CPU;
l2p_cl.cpu_funcs[0] = StarPUCpuWrapperClass::mergePassCallback;
#ifdef STARPU_USE_CPU
if(originalCpuKernel->supportL2P()){
l2p_cl.cpu_funcs[0] = StarPUCpuWrapperClass::mergePassCallback;
l2p_cl.where |= STARPU_CPU;
}
#endif
#ifdef STARPU_USE_CUDA
if(originalCpuKernel->supportL2P()){
l2p_cl.cuda_funcs[0] = StarPUCudaWrapperClass::mergePassCallback;
l2p_cl.where |= STARPU_CUDA;
}
#endif
#ifdef STARPU_USE_OPENCL
if(originalCpuKernel->supportL2P()){
l2p_cl.opencl_funcs[0] = StarPUOpenClWrapperClass::mergePassCallback;
l2p_cl.where |= STARPU_OPENCL;
}
#endif
l2p_cl.nbuffers = 2;
l2p_cl.modes[0] = STARPU_R;
l2p_cl.modes[1] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE);
l2p_cl.name = "l2p_cl";
memset(&p2p_cl_in, 0, sizeof(p2p_cl_in));
p2p_cl_in.where = STARPU_CPU;
p2p_cl_in.cpu_funcs[0] = StarPUCpuWrapperClass::directInPassCallback;
#ifdef STARPU_USE_CPU
if(originalCpuKernel->supportP2P()){
p2p_cl_in.cpu_funcs[0] = StarPUCpuWrapperClass::directInPassCallback;
p2p_cl_in.where |= STARPU_CPU;
}
#endif
#ifdef STARPU_USE_CUDA
if(originalCpuKernel->supportP2P()){
p2p_cl_in.cuda_funcs[0] = StarPUCudaWrapperClass::directInPassCallback;
p2p_cl_in.where |= STARPU_CUDA;
}
#endif
#ifdef STARPU_USE_OPENCL
if(originalCpuKernel->supportP2P()){
p2p_cl_in.opencl_funcs[0] = StarPUOpenClWrapperClass::directInPassCallback;
p2p_cl_in.where |= STARPU_OPENCL;
}
#endif
p2p_cl_in.nbuffers = 1;
p2p_cl_in.modes[0] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE);
p2p_cl_in.name = "p2p_cl_in";
memset(&p2p_cl_inout, 0, sizeof(p2p_cl_inout));
p2p_cl_inout.where = STARPU_CPU;
p2p_cl_inout.cpu_funcs[0] = StarPUCpuWrapperClass::directInoutPassCallback;
#ifdef STARPU_USE_CPU
if(originalCpuKernel->supportP2P()){
p2p_cl_inout.cpu_funcs[0] = StarPUCpuWrapperClass::directInoutPassCallback;
p2p_cl_inout.where |= STARPU_CPU;
}
#endif
#ifdef STARPU_USE_CUDA
if(originalCpuKernel->supportP2P()){
p2p_cl_inout.cuda_funcs[0] = StarPUCudaWrapperClass::directInoutPassCallback;
p2p_cl_inout.where |= STARPU_CUDA;
}
#endif
#ifdef STARPU_USE_OPENCL
if(originalCpuKernel->supportP2P()){
p2p_cl_inout.opencl_funcs[0] = StarPUOpenClWrapperClass::directInoutPassCallback;
p2p_cl_inout.where |= STARPU_OPENCL;
}
#endif
p2p_cl_inout.nbuffers = 2;
p2p_cl_inout.modes[0] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE);
p2p_cl_inout.modes[1] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE);
p2p_cl_inout.name = "p2p_cl_inout";
memset(&m2l_cl_in, 0, sizeof(m2l_cl_in));
m2l_cl_in.where = STARPU_CPU;
m2l_cl_in.cpu_funcs[0] = StarPUCpuWrapperClass::transferInPassCallback;
#ifdef STARPU_USE_CPU
if(originalCpuKernel->supportM2L()){
m2l_cl_in.cpu_funcs[0] = StarPUCpuWrapperClass::transferInPassCallback;
m2l_cl_in.where |= STARPU_CPU;
}
#endif
#ifdef STARPU_USE_CUDA
if(originalCpuKernel->supportM2L()){
m2l_cl_in.cuda_funcs[0] = StarPUCudaWrapperClass::transferInPassCallback;
m2l_cl_in.where |= STARPU_CUDA;
}
#endif
#ifdef STARPU_USE_OPENCL
if(originalCpuKernel->supportM2L()){
m2l_cl_in.opencl_funcs[0] = StarPUOpenClWrapperClass::transferInPassCallback;
m2l_cl_in.where |= STARPU_OPENCL;
}
#endif
m2l_cl_in.nbuffers = 2;
m2l_cl_in.modes[0] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE);
m2l_cl_in.modes[1] = STARPU_R;
m2l_cl_in.name = "m2l_cl_in";
memset(&m2l_cl_inout, 0, sizeof(m2l_cl_inout));
m2l_cl_inout.where = STARPU_CPU;
m2l_cl_inout.cpu_funcs[0] = StarPUCpuWrapperClass::transferInoutPassCallback;
#ifdef STARPU_USE_CPU
if(originalCpuKernel->supportM2L()){
m2l_cl_inout.cpu_funcs[0] = StarPUCpuWrapperClass::transferInoutPassCallback;
m2l_cl_inout.where |= STARPU_CPU;
}
#endif
#ifdef STARPU_USE_CUDA
if(originalCpuKernel->supportM2L()){
m2l_cl_inout.cuda_funcs[0] = StarPUCudaWrapperClass::transferInoutPassCallback;
m2l_cl_inout.where |= STARPU_CUDA;
}
#endif
#ifdef STARPU_USE_OPENCL
if(originalCpuKernel->supportM2L()){
m2l_cl_inout.opencl_funcs[0] = StarPUOpenClWrapperClass::transferInoutPassCallback;
m2l_cl_inout.where |= STARPU_OPENCL;
}
#endif
m2l_cl_inout.nbuffers = 4;
m2l_cl_inout.modes[0] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE);
m2l_cl_inout.modes[1] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE);
......@@ -456,7 +634,7 @@ protected:
for(int idxGroup = 0 ; idxGroup < tree->getNbParticleGroup() ; ++idxGroup){
starpu_insert_task(&p2m_cl,
STARPU_VALUE, &wrapperptr, sizeof(StarPUCpuWrapperClass*),
STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
STARPU_RW, handles_up[tree->getHeight()-1][idxGroup],
STARPU_R, handles_up[tree->getHeight()][idxGroup],
0);
......@@ -487,13 +665,15 @@ protected:
FAssertLF( idxSubGroup != tree->getNbCellGroupAtLevel(idxLevel+1) );
FAssertLF( (tree->getCellGroup(idxLevel+1, idxSubGroup)->getStartingIndex()>>3) == currentCells->getStartingIndex() );
}
// Copy at max 8 groups
int nbSubCellGroups = 0;
task->dyn_handles[nbSubCellGroups + 1] = handles_up[idxLevel+1][idxSubGroup];
nbSubCellGroups += 1;
while(tree->getCellGroup(idxLevel+1, idxSubGroup)->getEndingIndex() <= ((currentCells->getEndingIndex()<<3)+7)
while(tree->getCellGroup(idxLevel+1, idxSubGroup)->getEndingIndex() <= (((currentCells->getEndingIndex()-1)<<3)+7)
&& (idxSubGroup+1) != tree->getNbCellGroupAtLevel(idxLevel+1)
&& tree->getCellGroup(idxLevel+1, idxSubGroup+1)->getStartingIndex() <= (currentCells->getEndingIndex()<<3)+7 ){
&& tree->getCellGroup(idxLevel+1, idxSubGroup+1)->getStartingIndex() <= ((currentCells->getEndingIndex()-1)<<3)+7 ){
idxSubGroup += 1;
task->dyn_handles[nbSubCellGroups + 1] = handles_up[idxLevel+1][idxSubGroup];
nbSubCellGroups += 1;
......@@ -506,7 +686,7 @@ protected:
char *arg_buffer;
size_t arg_buffer_size;
starpu_codelet_pack_args((void**)&arg_buffer, &arg_buffer_size,
STARPU_VALUE, &wrapperptr, sizeof(StarPUCpuWrapperClass*),
STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
STARPU_VALUE, &nbSubCellGroups, sizeof(nbSubCellGroups),
STARPU_VALUE, &idxLevel, sizeof(idxLevel),
0);
......@@ -529,7 +709,7 @@ protected:
FLOG( timerInBlock.tic() );
for(int idxGroup = 0 ; idxGroup < tree->getNbCellGroupAtLevel(idxLevel) ; ++idxGroup){
starpu_insert_task(&m2l_cl_in,
STARPU_VALUE, &wrapperptr, sizeof(StarPUCpuWrapperClass*),
STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
STARPU_VALUE, &idxLevel, sizeof(idxLevel),
(STARPU_RW|STARPU_COMMUTE), handles_down[idxLevel][idxGroup],
STARPU_R, handles_up[idxLevel][idxGroup],
......@@ -544,7 +724,7 @@ protected:
const std::vector<OutOfBlockInteraction>* outsideInteractions = &externalInteractionsAllLevel[idxLevel][idxGroup][idxInteraction].interactions;
starpu_insert_task(&m2l_cl_inout,
STARPU_VALUE, &wrapperptr, sizeof(StarPUCpuWrapperClass*),
STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
STARPU_VALUE, &idxLevel, sizeof(idxLevel),
STARPU_VALUE, &outsideInteractions, sizeof(outsideInteractions),
(STARPU_RW|STARPU_COMMUTE), handles_down[idxLevel][idxGroup],
......@@ -587,9 +767,9 @@ protected:
int nbSubCellGroups = 0;
task->dyn_handles[nbSubCellGroups + 1] = handles_down[idxLevel+1][idxSubGroup];
nbSubCellGroups += 1;
while(tree->getCellGroup(idxLevel+1, idxSubGroup)->getEndingIndex() <= ((currentCells->getEndingIndex()<<3)+7)
while(tree->getCellGroup(idxLevel+1, idxSubGroup)->getEndingIndex() <= (((currentCells->getEndingIndex()-1)<<3)+7)
&& (idxSubGroup+1) != tree->getNbCellGroupAtLevel(idxLevel+1)
&& tree->getCellGroup(idxLevel+1, idxSubGroup+1)->getStartingIndex() <= (currentCells->getEndingIndex()<<3)+7 ){
&& tree->getCellGroup(idxLevel+1, idxSubGroup+1)->getStartingIndex() <= ((currentCells->getEndingIndex()-1)<<3)+7 ){
idxSubGroup += 1;
task->dyn_handles[nbSubCellGroups + 1] = handles_down[idxLevel+1][idxSubGroup];
nbSubCellGroups += 1;
......@@ -602,7 +782,7 @@ protected:
char *arg_buffer;
size_t arg_buffer_size;
starpu_codelet_pack_args((void**)&arg_buffer, &arg_buffer_size,
STARPU_VALUE, &wrapperptr, sizeof(StarPUCpuWrapperClass*),
STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
STARPU_VALUE, &nbSubCellGroups, sizeof(nbSubCellGroups),
STARPU_VALUE, &idxLevel, sizeof(idxLevel),
0);
......@@ -625,7 +805,7 @@ protected:
FLOG( timerInBlock.tic() );
for(int idxGroup = 0 ; idxGroup < tree->getNbParticleGroup() ; ++idxGroup){
starpu_insert_task(&p2p_cl_in,
STARPU_VALUE, &wrapperptr, sizeof(StarPUCpuWrapperClass*),
STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
(STARPU_RW|STARPU_COMMUTE), handles_down[tree->getHeight()][idxGroup],
0);
}
......@@ -636,7 +816,7 @@ protected:
const int interactionid = externalInteractionsLeafLevel[idxGroup][idxInteraction].otherBlockId;
const std::vector<OutOfBlockInteraction>* outsideInteractions = &externalInteractionsLeafLevel[idxGroup][idxInteraction].interactions;
starpu_insert_task(&p2p_cl_inout,
STARPU_VALUE, &wrapperptr, sizeof(StarPUCpuWrapperClass*),
STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
STARPU_VALUE, &outsideInteractions, sizeof(outsideInteractions),
(STARPU_RW|STARPU_COMMUTE), handles_down[tree->getHeight()][idxGroup],
(STARPU_RW|STARPU_COMMUTE), handles_down[tree->getHeight()][interactionid],
......@@ -658,7 +838,7 @@ protected:
for(int idxGroup = 0 ; idxGroup < tree->getNbParticleGroup() ; ++idxGroup){
starpu_insert_task(&l2p_cl,
STARPU_VALUE, &wrapperptr, sizeof(StarPUCpuWrapperClass*),
STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
STARPU_R, handles_down[tree->getHeight()-1][idxGroup],
(STARPU_RW|STARPU_COMMUTE), handles_down[tree->getHeight()][idxGroup],
0);
......
// @SCALFMM_PRIVATE
#ifndef FSTARPUCPUWRAPPER_HPP
#define FSTARPUCPUWRAPPER_HPP
......@@ -31,6 +33,7 @@ extern "C"{
}
#endif
#include "FStarPUUtils.hpp"
template <class CellContainerClass, class CellClass, class KernelClass,
class ParticleGroupClass, class ParticleContainerClass>
......@@ -80,9 +83,9 @@ public:
ParticleGroupClass containers((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[1]),
STARPU_VARIABLE_GET_ELEMSIZE(buffers[1]));
ThisClass* worker = nullptr;
FStarPUPtrInterface* worker = nullptr;
starpu_codelet_unpack_args(cl_arg, &worker);
worker->bottomPassPerform(&leafCells, &containers);
worker->get<ThisClass>(FSTARPU_CPU_IDX)->bottomPassPerform(&leafCells, &containers);
}
void bottomPassPerform(CellContainerClass* leafCells, ParticleGroupClass* containers){
......@@ -109,7 +112,7 @@ public:
CellContainerClass currentCells((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[0]),
STARPU_VARIABLE_GET_ELEMSIZE(buffers[0]));
ThisClass* worker = nullptr;
FStarPUPtrInterface* worker = nullptr;
int nbSubCellGroups = 0;
int idxLevel = 0;
starpu_codelet_unpack_args(cl_arg, &worker, &nbSubCellGroups, &idxLevel);
......@@ -121,7 +124,7 @@ public:
STARPU_VARIABLE_GET_ELEMSIZE(buffers[idxSubGroup+1]));
}
worker->upwardPassPerform(&currentCells, subCellGroups, nbSubCellGroups, idxLevel);
worker->get<ThisClass>(FSTARPU_CPU_IDX)->upwardPassPerform(&currentCells, subCellGroups, nbSubCellGroups, idxLevel);
for(int idxSubGroup = 0; idxSubGroup < nbSubCellGroups ; ++idxSubGroup){
delete subCellGroups[idxSubGroup];
......@@ -172,12 +175,12 @@ public:
CellContainerClass externalCells((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[1]),
STARPU_VARIABLE_GET_ELEMSIZE(buffers[1]));
ThisClass* worker = nullptr;
FStarPUPtrInterface* worker = nullptr;
int idxLevel = 0;
const std::vector<OutOfBlockInteraction>* outsideInteractions;
starpu_codelet_unpack_args(cl_arg, &worker, &idxLevel, &outsideInteractions);
worker->transferInoutPassPerformMpi(&currentCells, &externalCells, idxLevel, outsideInteractions);
worker->get<ThisClass>(FSTARPU_CPU_IDX)->transferInoutPassPerformMpi(&currentCells, &externalCells, idxLevel, outsideInteractions);
}
......@@ -213,11 +216,11 @@ public:
CellContainerClass currentCells((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[0]),
STARPU_VARIABLE_GET_ELEMSIZE(buffers[0]));
ThisClass* worker = nullptr;
FStarPUPtrInterface* worker = nullptr;
int idxLevel = 0;
starpu_codelet_unpack_args(cl_arg, &worker, &idxLevel);
worker->transferInPassPerform(&currentCells, idxLevel);
worker->get<ThisClass>(FSTARPU_CPU_IDX)->transferInPassPerform(&currentCells, idxLevel);
}
void transferInPassPerform(CellContainerClass*const currentCells, const int idxLevel){
......@@ -264,12 +267,12 @@ public:
CellContainerClass externalCells((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[1]),
STARPU_VARIABLE_GET_ELEMSIZE(buffers[1]));
ThisClass* worker = nullptr;
FStarPUPtrInterface* worker = nullptr;
int idxLevel = 0;
const std::vector<OutOfBlockInteraction>* outsideInteractions;
starpu_codelet_unpack_args(cl_arg, &worker, &idxLevel, &outsideInteractions);
worker->transferInoutPassPerform(&currentCells, &externalCells, idxLevel, outsideInteractions);
worker->get<ThisClass>(FSTARPU_CPU_IDX)->transferInoutPassPerform(&currentCells, &externalCells, idxLevel, outsideInteractions);
}
......@@ -307,7 +310,7 @@ public:
CellContainerClass currentCells((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[0]),
STARPU_VARIABLE_GET_ELEMSIZE(buffers[0]));
ThisClass* worker = nullptr;
FStarPUPtrInterface* worker = nullptr;
int nbSubCellGroups = 0;
int idxLevel = 0;
starpu_codelet_unpack_args(cl_arg, &worker, &nbSubCellGroups, &idxLevel);
......@@ -319,7 +322,7 @@ public:
STARPU_VARIABLE_GET_ELEMSIZE(buffers[idxSubGroup+1]));
}
worker->downardPassPerform(&currentCells, subCellGroups, nbSubCellGroups, idxLevel);
worker->get<ThisClass>(FSTARPU_CPU_IDX)->downardPassPerform(&currentCells, subCellGroups, nbSubCellGroups, idxLevel);
for(int idxSubGroup = 0; idxSubGroup < nbSubCellGroups ; ++idxSubGroup){
delete subCellGroups[idxSubGroup];
......@@ -370,11 +373,11 @@ public:
ParticleGroupClass externalContainers((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[1]),
STARPU_VARIABLE_GET_ELEMSIZE(buffers[1]));
ThisClass* worker = nullptr;
FStarPUPtrInterface* worker = nullptr;
const std::vector<OutOfBlockInteraction>* outsideInteractions = nullptr;
starpu_codelet_unpack_args(cl_arg, &worker, &outsideInteractions);
worker->directInoutPassPerform(&containers, &externalContainers, outsideInteractions);
worker->get<ThisClass>(FSTARPU_CPU_IDX)->directInoutPassPerform(&containers, &externalContainers, outsideInteractions);
}
void directInoutPassPerformMpi(ParticleGroupClass* containers, ParticleGroupClass* containersOther,
......@@ -402,10 +405,9 @@ public:
ParticleGroupClass containers((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[0]),
STARPU_VARIABLE_GET_ELEMSIZE(buffers[0]));
ThisClass* worker = nullptr;
FStarPUPtrInterface* worker = nullptr;
starpu_codelet_unpack_args(cl_arg, &worker);
worker->directInPassPerform(&containers);
worker->get<ThisClass>(FSTARPU_CPU_IDX)->directInPassPerform(&containers);
}
void directInPassPerform(ParticleGroupClass* containers){
......@@ -448,11 +450,11 @@ public:
ParticleGroupClass externalContainers((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[1]),
STARPU_VARIABLE_GET_ELEMSIZE(buffers[1]));
ThisClass* worker = nullptr;
FStarPUPtrInterface* worker = nullptr;
const std::vector<OutOfBlockInteraction>* outsideInteractions = nullptr;
starpu_codelet_unpack_args(cl_arg, &worker, &outsideInteractions);
worker->directInoutPassPerform(&containers, &externalContainers, outsideInteractions);
worker->get<ThisClass>(FSTARPU_CPU_IDX)->directInoutPassPerform(&containers, &externalContainers, outsideInteractions);
}
void directInoutPassPerform(ParticleGroupClass* containers, ParticleGroupClass* containersOther,
......@@ -486,9 +488,9 @@ public:
ParticleGroupClass containers((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[1]),
STARPU_VARIABLE_GET_ELEMSIZE(buffers[1]));
ThisClass* worker = nullptr;
FStarPUPtrInterface* worker = nullptr;
starpu_codelet_unpack_args(cl_arg, &worker);
worker->mergePassPerform(&leafCells, &containers);
worker->get<ThisClass>(FSTARPU_CPU_IDX)->mergePassPerform(&leafCells, &containers);
}
void mergePassPerform(CellContainerClass* leafCells, ParticleGroupClass* containers){
......
This diff is collapsed.
// @SCALFMM_PRIVATE
#ifndef FSTARPUKERNELCAPACITIES_HPP
#define FSTARPUKERNELCAPACITIES_HPP
/** A class used with the starpu system should
* implement this interface in order to inform the algorithm about what the kernel
* is doing.
*/
class FStarPUKernelCapacities {
public:
virtual bool supportP2M() const = 0;
virtual bool supportM2M() const = 0;
virtual bool supportM2L() const = 0;
virtual bool supportL2L() const = 0;
virtual bool supportL2P() const = 0;