Commit 8cda28fc authored by BRAMAS Berenger's avatar BRAMAS Berenger

Prepare OpenCl and Cuda wrapper

parent 7116ae84
......@@ -21,7 +21,16 @@ extern "C"{
#include <starpu.h>
}
#ifdef STARPU_USE_CPU
#include "FStarPUCpuWrapper.hpp"
#endif
#ifdef STARPU_USE_CUDA
#include "FStarPUCudaWrapper.hpp"
#endif
#ifdef STARPU_USE_OPENCL
#include "FStarPUOpenClWrapper.hpp"
#endif
#include "FStarPUUtils.hpp"
template <class OctreeClass, class CellContainerClass, class CellClass, class KernelClass, class ParticleGroupClass, class ParticleContainerClass>
......@@ -71,6 +80,14 @@ protected:
typedef FStarPUCpuWrapper<CellContainerClass, CellClass, KernelClass, ParticleGroupClass, ParticleContainerClass> StarPUCpuWrapperClass;
StarPUCpuWrapperClass cpuWrapper;
#endif
#ifdef STARPU_USE_CUDA
typedef FStarPUCudaWrapper<CellContainerClass, CellClass, KernelClass, ParticleGroupClass, ParticleContainerClass> StarPUCudaWrapperClass;
StarPUCudaWrapperClass cudaWrapper;
#endif
#ifdef STARPU_USE_OPENCL
typedef FStarPUOpenClWrapper<CellContainerClass, CellClass, KernelClass, ParticleGroupClass, ParticleContainerClass> StarPUOpenClWrapperClass;
StarPUOpenClWrapperClass openclWrapper;
#endif
FStarPUPtrInterface wrappers;
FStarPUPtrInterface* wrapperptr;
......@@ -81,6 +98,12 @@ public:
handles_up(nullptr), handles_down(nullptr),
#ifdef STARPU_USE_CPU
cpuWrapper(tree->getHeight()),
#endif
#ifdef STARPU_USE_CUDA
cudaWrapper(tree->getHeight()),
#endif
#ifdef STARPU_USE_OPENCL
openclWrapper(tree->getHeight()),
#endif
wrapperptr(&wrappers){
FAssertLF(tree, "tree cannot be null");
......@@ -101,6 +124,22 @@ public:
starpu_pthread_mutex_unlock(&initMutex);
});
wrappers.set(FSTARPU_CPU_IDX, &cpuWrapper);
#endif
#ifdef STARPU_USE_CUDA
FStarPUUtils::ExecOnWorkers(STARPU_CUDA, [&](){
starpu_pthread_mutex_lock(&initMutex);
cudaWrapper.initKernel(starpu_worker_get_id(), inKernels);
starpu_pthread_mutex_unlock(&initMutex);
});
wrappers.set(FSTARPU_CUDA_IDX, &cudaWrapper);
#endif
#ifdef STARPU_USE_OPENCL
FStarPUUtils::ExecOnWorkers(STARPU_OPENCL, [&](){
starpu_pthread_mutex_lock(&initMutex);
openclWrapper.initKernel(starpu_worker_get_id(), inKernels);
starpu_pthread_mutex_unlock(&initMutex);
});
wrappers.set(FSTARPU_OPENCL_IDX, &openclWrapper);
#endif
starpu_pthread_mutex_destroy(&initMutex);
......@@ -160,6 +199,18 @@ protected:
p2m_cl.cpu_funcs[0] = StarPUCpuWrapperClass::bottomPassCallback;
p2m_cl.where |= STARPU_CPU;
}
#endif
#ifdef STARPU_USE_CUDA
if(originalCpuKernel->supportP2M()){
p2m_cl.cuda_funcs[0] = StarPUCudaWrapperClass::bottomPassCallback;
p2m_cl.where |= STARPU_CUDA;
}
#endif
#ifdef STARPU_USE_OPENCL
if(originalCpuKernel->supportP2M()){
p2m_cl.opencl_funcs[0] = StarPUOpenClWrapperClass::bottomPassCallback;
p2m_cl.where |= STARPU_OPENCL;
}
#endif
p2m_cl.nbuffers = 2;
p2m_cl.modes[0] = STARPU_RW;
......@@ -174,6 +225,18 @@ protected:
m2m_cl[idx].cpu_funcs[0] = StarPUCpuWrapperClass::upwardPassCallback;
m2m_cl[idx].where |= STARPU_CPU;
}
#endif
#ifdef STARPU_USE_CUDA
if(originalCpuKernel->supportM2M()){
m2m_cl[idx].cuda_funcs[0] = StarPUCudaWrapperClass::upwardPassCallback;
m2m_cl[idx].where |= STARPU_CUDA;
}
#endif
#ifdef STARPU_USE_OPENCL
if(originalCpuKernel->supportM2M()){
m2m_cl[idx].opencl_funcs[0] = StarPUOpenClWrapperClass::upwardPassCallback;
m2m_cl[idx].where |= STARPU_OPENCL;
}
#endif
m2m_cl[idx].nbuffers = idx+2;
m2m_cl[idx].dyn_modes = (starpu_data_access_mode*)malloc((idx+2)*sizeof(starpu_data_access_mode));
......@@ -185,6 +248,18 @@ protected:
l2l_cl[idx].cpu_funcs[0] = StarPUCpuWrapperClass::downardPassCallback;
l2l_cl[idx].where |= STARPU_CPU;
}
#endif
#ifdef STARPU_USE_CUDA
if(originalCpuKernel->supportL2L()){
l2l_cl[idx].cuda_funcs[0] = StarPUCudaWrapperClass::downardPassCallback;
l2l_cl[idx].where |= STARPU_CUDA;
}
#endif
#ifdef STARPU_USE_OPENCL
if(originalCpuKernel->supportL2L()){
l2l_cl[idx].opencl_funcs[0] = StarPUOpenClWrapperClass::downardPassCallback;
l2l_cl[idx].where |= STARPU_OPENCL;
}
#endif
l2l_cl[idx].nbuffers = idx+2;
l2l_cl[idx].dyn_modes = (starpu_data_access_mode*)malloc((idx+2)*sizeof(starpu_data_access_mode));
......@@ -203,6 +278,18 @@ protected:
l2p_cl.cpu_funcs[0] = StarPUCpuWrapperClass::mergePassCallback;
l2p_cl.where |= STARPU_CPU;
}
#endif
#ifdef STARPU_USE_CUDA
if(originalCpuKernel->supportL2P()){
l2p_cl.cuda_funcs[0] = StarPUCudaWrapperClass::mergePassCallback;
l2p_cl.where |= STARPU_CUDA;
}
#endif
#ifdef STARPU_USE_OPENCL
if(originalCpuKernel->supportL2P()){
l2p_cl.opencl_funcs[0] = StarPUOpenClWrapperClass::mergePassCallback;
l2p_cl.where |= STARPU_OPENCL;
}
#endif
l2p_cl.nbuffers = 2;
l2p_cl.modes[0] = STARPU_R;
......@@ -215,6 +302,18 @@ protected:
p2p_cl_in.cpu_funcs[0] = StarPUCpuWrapperClass::directInPassCallback;
p2p_cl_in.where |= STARPU_CPU;
}
#endif
#ifdef STARPU_USE_CUDA
if(originalCpuKernel->supportP2P()){
p2p_cl_in.cuda_funcs[0] = StarPUCudaWrapperClass::directInPassCallback;
p2p_cl_in.where |= STARPU_CUDA;
}
#endif
#ifdef STARPU_USE_OPENCL
if(originalCpuKernel->supportP2P()){
p2p_cl_in.opencl_funcs[0] = StarPUOpenClWrapperClass::directInPassCallback;
p2p_cl_in.where |= STARPU_OPENCL;
}
#endif
p2p_cl_in.nbuffers = 1;
p2p_cl_in.modes[0] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE);
......@@ -225,6 +324,18 @@ protected:
p2p_cl_inout.cpu_funcs[0] = StarPUCpuWrapperClass::directInoutPassCallback;
p2p_cl_inout.where |= STARPU_CPU;
}
#endif
#ifdef STARPU_USE_CUDA
if(originalCpuKernel->supportP2P()){
p2p_cl_inout.cuda_funcs[0] = StarPUCudaWrapperClass::directInoutPassCallback;
p2p_cl_inout.where |= STARPU_CUDA;
}
#endif
#ifdef STARPU_USE_OPENCL
if(originalCpuKernel->supportP2P()){
p2p_cl_inout.opencl_funcs[0] = StarPUOpenClWrapperClass::directInoutPassCallback;
p2p_cl_inout.where |= STARPU_OPENCL;
}
#endif
p2p_cl_inout.nbuffers = 2;
p2p_cl_inout.modes[0] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE);
......@@ -237,6 +348,18 @@ protected:
m2l_cl_in.cpu_funcs[0] = StarPUCpuWrapperClass::transferInPassCallback;
m2l_cl_in.where |= STARPU_CPU;
}
#endif
#ifdef STARPU_USE_CUDA
if(originalCpuKernel->supportM2L()){
m2l_cl_in.cuda_funcs[0] = StarPUCudaWrapperClass::transferInPassCallback;
m2l_cl_in.where |= STARPU_CUDA;
}
#endif
#ifdef STARPU_USE_OPENCL
if(originalCpuKernel->supportM2L()){
m2l_cl_in.opencl_funcs[0] = StarPUOpenClWrapperClass::transferInPassCallback;
m2l_cl_in.where |= STARPU_OPENCL;
}
#endif
m2l_cl_in.nbuffers = 2;
m2l_cl_in.modes[0] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE);
......@@ -248,6 +371,18 @@ protected:
m2l_cl_inout.cpu_funcs[0] = StarPUCpuWrapperClass::transferInoutPassCallback;
m2l_cl_inout.where |= STARPU_CPU;
}
#endif
#ifdef STARPU_USE_CUDA
if(originalCpuKernel->supportM2L()){
m2l_cl_inout.cuda_funcs[0] = StarPUCudaWrapperClass::transferInoutPassCallback;
m2l_cl_inout.where |= STARPU_CUDA;
}
#endif
#ifdef STARPU_USE_OPENCL
if(originalCpuKernel->supportM2L()){
m2l_cl_inout.opencl_funcs[0] = StarPUOpenClWrapperClass::transferInoutPassCallback;
m2l_cl_inout.where |= STARPU_OPENCL;
}
#endif
m2l_cl_inout.nbuffers = 4;
m2l_cl_inout.modes[0] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE);
......
......@@ -25,7 +25,15 @@ extern "C"{
#include <starpu_mpi.h>
}
#ifdef STARPU_USE_CPU
#include "FStarPUCpuWrapper.hpp"
#endif
#ifdef STARPU_USE_CUDA
#include "FStarPUCudaWrapper.hpp"
#endif
#ifdef STARPU_USE_OPENCL
#include "FStarPUOpenClWrapper.hpp"
#endif
#include "FStarPUUtils.hpp"
template <class OctreeClass, class CellContainerClass, class CellClass, class KernelClass, class ParticleGroupClass, class ParticleContainerClass>
......@@ -86,6 +94,14 @@ protected:
typedef FStarPUCpuWrapper<CellContainerClass, CellClass, KernelClass, ParticleGroupClass, ParticleContainerClass> StarPUCpuWrapperClass;
StarPUCpuWrapperClass cpuWrapper;
#endif
#ifdef STARPU_USE_CUDA
typedef FStarPUCudaWrapper<CellContainerClass, CellClass, KernelClass, ParticleGroupClass, ParticleContainerClass> StarPUCudaWrapperClass;
StarPUCudaWrapperClass cudaWrapper;
#endif
#ifdef STARPU_USE_OPENCL
typedef FStarPUOpenClWrapper<CellContainerClass, CellClass, KernelClass, ParticleGroupClass, ParticleContainerClass> StarPUOpenClWrapperClass;
StarPUOpenClWrapperClass openclWrapper;
#endif
FStarPUPtrInterface wrappers;
FStarPUPtrInterface* wrapperptr;
......@@ -96,6 +112,12 @@ public:
handles_up(nullptr), handles_down(nullptr),
#ifdef STARPU_USE_CPU
cpuWrapper(tree->getHeight()),
#endif
#ifdef STARPU_USE_CUDA
cudaWrapper(tree->getHeight()),
#endif
#ifdef STARPU_USE_OPENCL
openclWrapper(tree->getHeight()),
#endif
wrapperptr(&wrappers){
FAssertLF(tree, "tree cannot be null");
......@@ -117,6 +139,22 @@ public:
starpu_pthread_mutex_unlock(&initMutex);
});
wrappers.set(FSTARPU_CPU_IDX, &cpuWrapper);
#endif
#ifdef STARPU_USE_CUDA
FStarPUUtils::ExecOnWorkers(STARPU_CUDA, [&](){
starpu_pthread_mutex_lock(&initMutex);
cudaWrapper.initKernel(starpu_worker_get_id(), inKernels);
starpu_pthread_mutex_unlock(&initMutex);
});
wrappers.set(FSTARPU_CUDA_IDX, &cudaWrapper);
#endif
#ifdef STARPU_USE_OPENCL
FStarPUUtils::ExecOnWorkers(STARPU_OPENCL, [&](){
starpu_pthread_mutex_lock(&initMutex);
openclWrapper.initKernel(starpu_worker_get_id(), inKernels);
starpu_pthread_mutex_unlock(&initMutex);
});
wrappers.set(FSTARPU_OPENCL_IDX, &openclWrapper);
#endif
starpu_pthread_mutex_destroy(&initMutex);
......@@ -188,6 +226,18 @@ protected:
p2m_cl.cpu_funcs[0] = StarPUCpuWrapperClass::bottomPassCallback;
p2m_cl.where |= STARPU_CPU;
}
#endif
#ifdef STARPU_USE_CUDA
if(originalCpuKernel->supportP2M()){
p2m_cl.cuda_funcs[0] = StarPUCudaWrapperClass::bottomPassCallback;
p2m_cl.where |= STARPU_CUDA;
}
#endif
#ifdef STARPU_USE_OPENCL
if(originalCpuKernel->supportP2M()){
p2m_cl.opencl_funcs[0] = StarPUOpenClWrapperClass::bottomPassCallback;
p2m_cl.where |= STARPU_OPENCL;
}
#endif
p2m_cl.nbuffers = 2;
p2m_cl.modes[0] = STARPU_RW;
......@@ -202,6 +252,18 @@ protected:
m2m_cl[idx].cpu_funcs[0] = StarPUCpuWrapperClass::upwardPassCallback;
m2m_cl[idx].where |= STARPU_CPU;
}
#endif
#ifdef STARPU_USE_CUDA
if(originalCpuKernel->supportM2M()){
m2m_cl[idx].cuda_funcs[0] = StarPUCudaWrapperClass::upwardPassCallback;
m2m_cl[idx].where |= STARPU_CUDA;
}
#endif
#ifdef STARPU_USE_OPENCL
if(originalCpuKernel->supportM2M()){
m2m_cl[idx].opencl_funcs[0] = StarPUOpenClWrapperClass::upwardPassCallback;
m2m_cl[idx].where |= STARPU_OPENCL;
}
#endif
m2m_cl[idx].nbuffers = idx+2;
m2m_cl[idx].dyn_modes = (starpu_data_access_mode*)malloc((idx+2)*sizeof(starpu_data_access_mode));
......@@ -213,6 +275,18 @@ protected:
l2l_cl[idx].cpu_funcs[0] = StarPUCpuWrapperClass::downardPassCallback;
l2l_cl[idx].where |= STARPU_CPU;
}
#endif
#ifdef STARPU_USE_CUDA
if(originalCpuKernel->supportL2L()){
l2l_cl[idx].cuda_funcs[0] = StarPUCudaWrapperClass::downardPassCallback;
l2l_cl[idx].where |= STARPU_CUDA;
}
#endif
#ifdef STARPU_USE_OPENCL
if(originalCpuKernel->supportL2L()){
l2l_cl[idx].opencl_funcs[0] = StarPUOpenClWrapperClass::downardPassCallback;
l2l_cl[idx].where |= STARPU_OPENCL;
}
#endif
l2l_cl[idx].nbuffers = idx+2;
l2l_cl[idx].dyn_modes = (starpu_data_access_mode*)malloc((idx+2)*sizeof(starpu_data_access_mode));
......@@ -231,6 +305,18 @@ protected:
l2p_cl.cpu_funcs[0] = StarPUCpuWrapperClass::mergePassCallback;
l2p_cl.where |= STARPU_CPU;
}
#endif
#ifdef STARPU_USE_CUDA
if(originalCpuKernel->supportL2P()){
l2p_cl.cuda_funcs[0] = StarPUCudaWrapperClass::mergePassCallback;
l2p_cl.where |= STARPU_CUDA;
}
#endif
#ifdef STARPU_USE_OPENCL
if(originalCpuKernel->supportL2P()){
l2p_cl.opencl_funcs[0] = StarPUOpenClWrapperClass::mergePassCallback;
l2p_cl.where |= STARPU_OPENCL;
}
#endif
l2p_cl.nbuffers = 2;
l2p_cl.modes[0] = STARPU_R;
......@@ -243,6 +329,18 @@ protected:
p2p_cl_in.cpu_funcs[0] = StarPUCpuWrapperClass::directInPassCallback;
p2p_cl_in.where |= STARPU_CPU;
}
#endif
#ifdef STARPU_USE_CUDA
if(originalCpuKernel->supportP2P()){
p2p_cl_in.cuda_funcs[0] = StarPUCudaWrapperClass::directInPassCallback;
p2p_cl_in.where |= STARPU_CUDA;
}
#endif
#ifdef STARPU_USE_OPENCL
if(originalCpuKernel->supportP2P()){
p2p_cl_in.opencl_funcs[0] = StarPUOpenClWrapperClass::directInPassCallback;
p2p_cl_in.where |= STARPU_OPENCL;
}
#endif
p2p_cl_in.nbuffers = 1;
p2p_cl_in.modes[0] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE);
......@@ -253,6 +351,18 @@ protected:
p2p_cl_inout.cpu_funcs[0] = StarPUCpuWrapperClass::directInoutPassCallback;
p2p_cl_inout.where |= STARPU_CPU;
}
#endif
#ifdef STARPU_USE_CUDA
if(originalCpuKernel->supportP2P()){
p2p_cl_inout.cuda_funcs[0] = StarPUCudaWrapperClass::directInoutPassCallback;
p2p_cl_inout.where |= STARPU_CUDA;
}
#endif
#ifdef STARPU_USE_OPENCL
if(originalCpuKernel->supportP2P()){
p2p_cl_inout.opencl_funcs[0] = StarPUOpenClWrapperClass::directInoutPassCallback;
p2p_cl_inout.where |= STARPU_OPENCL;
}
#endif
p2p_cl_inout.nbuffers = 2;
p2p_cl_inout.modes[0] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE);
......@@ -265,6 +375,18 @@ protected:
m2l_cl_in.cpu_funcs[0] = StarPUCpuWrapperClass::transferInPassCallback;
m2l_cl_in.where |= STARPU_CPU;
}
#endif
#ifdef STARPU_USE_CUDA
if(originalCpuKernel->supportM2L()){
m2l_cl_in.cuda_funcs[0] = StarPUCudaWrapperClass::transferInPassCallback;
m2l_cl_in.where |= STARPU_CUDA;
}
#endif
#ifdef STARPU_USE_OPENCL
if(originalCpuKernel->supportM2L()){
m2l_cl_in.opencl_funcs[0] = StarPUOpenClWrapperClass::transferInPassCallback;
m2l_cl_in.where |= STARPU_OPENCL;
}
#endif
m2l_cl_in.nbuffers = 2;
m2l_cl_in.modes[0] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE);
......@@ -276,6 +398,18 @@ protected:
m2l_cl_inout.cpu_funcs[0] = StarPUCpuWrapperClass::transferInoutPassCallback;
m2l_cl_inout.where |= STARPU_CPU;
}
#endif
#ifdef STARPU_USE_CUDA
if(originalCpuKernel->supportM2L()){
m2l_cl_inout.cuda_funcs[0] = StarPUCudaWrapperClass::transferInoutPassCallback;
m2l_cl_inout.where |= STARPU_CUDA;
}
#endif
#ifdef STARPU_USE_OPENCL
if(originalCpuKernel->supportM2L()){
m2l_cl_inout.opencl_funcs[0] = StarPUOpenClWrapperClass::transferInoutPassCallback;
m2l_cl_inout.where |= STARPU_OPENCL;
}
#endif
m2l_cl_inout.nbuffers = 4;
m2l_cl_inout.modes[0] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE);
......@@ -319,6 +453,18 @@ protected:
p2p_cl_inout_mpi.where |= STARPU_CPU;
p2p_cl_inout_mpi.cpu_funcs[0] = StarPUCpuWrapperClass::directInoutPassCallbackMpi;
}
#endif
#ifdef STARPU_USE_CUDA
if(originalCpuKernel->supportM2L()){
p2p_cl_inout_mpi.where |= STARPU_CUDA;
p2p_cl_inout_mpi.cuda_funcs[0] = StarPUCudaWrapperClass::directInoutPassCallbackMpi;
}
#endif
#ifdef STARPU_USE_OPENCL
if(originalCpuKernel->supportM2L()){
p2p_cl_inout_mpi.where |= STARPU_OPENCL;
p2p_cl_inout_mpi.opencl_funcs[0] = StarPUOpenClWrapperClass::directInoutPassCallbackMpi;
}
#endif
p2p_cl_inout_mpi.nbuffers = 2;
p2p_cl_inout_mpi.modes[0] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE);
......@@ -331,6 +477,18 @@ protected:
m2l_cl_inout_mpi.where |= STARPU_CPU;
m2l_cl_inout_mpi.cpu_funcs[0] = StarPUCpuWrapperClass::transferInoutPassCallbackMpi;
}
#endif
#ifdef STARPU_USE_CUDA
if(originalCpuKernel->supportM2L()){
m2l_cl_inout_mpi.where |= STARPU_CUDA;
m2l_cl_inout_mpi.cuda_funcs[0] = StarPUCudaWrapperClass::transferInoutPassCallbackMpi;
}
#endif
#ifdef STARPU_USE_OPENCL
if(originalCpuKernel->supportM2L()){
m2l_cl_inout_mpi.where |= STARPU_OPENCL;
m2l_cl_inout_mpi.opencl_funcs[0] = StarPUOpenClWrapperClass::transferInoutPassCallbackMpi;
}
#endif
m2l_cl_inout_mpi.nbuffers = 2;
m2l_cl_inout_mpi.modes[0] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE);
......
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment