Commit 090158ca authored by BRAMAS Berenger's avatar BRAMAS Berenger
Browse files

Make the opencl starpu compile

parent dd6edd46
......@@ -93,7 +93,7 @@ protected:
typedef FStarPUCudaWrapper<KernelClass, CudaCellContainerClass, CudaParticleGroupClass, CudaParticleContainerClass, CudaKernelClass> StarPUCudaWrapperClass;
StarPUCudaWrapperClass cudaWrapper;
#endif
#ifdef ScalFMM_ENABLE_CUDA_KERNEL
#ifdef ScalFMM_ENABLE_OPENCL_KERNEL
typedef FStarPUOpenClWrapper<KernelClass, OpenCLDeviceWrapperClass> StarPUOpenClWrapperClass;
StarPUOpenClWrapperClass openclWrapper;
#endif
......@@ -111,7 +111,7 @@ public:
#ifdef ScalFMM_ENABLE_CUDA_KERNEL
cudaWrapper(tree->getHeight()),
#endif
#ifdef ScalFMM_ENABLE_CUDA_KERNEL
#ifdef ScalFMM_ENABLE_OPENCL_KERNEL
openclWrapper(tree->getHeight()),
#endif
wrapperptr(&wrappers){
......@@ -142,7 +142,7 @@ public:
});
wrappers.set(FSTARPU_CUDA_IDX, &cudaWrapper);
#endif
#ifdef ScalFMM_ENABLE_CUDA_KERNEL
#ifdef ScalFMM_ENABLE_OPENCL_KERNEL
FStarPUUtils::ExecOnWorkers(STARPU_OPENCL, [&](){
starpu_pthread_mutex_lock(&initMutex);
openclWrapper.initKernel(starpu_worker_get_id(), inKernels);
......@@ -215,7 +215,7 @@ protected:
p2m_cl.where |= STARPU_CUDA;
}
#endif
#ifdef ScalFMM_ENABLE_CUDA_KERNEL
#ifdef ScalFMM_ENABLE_OPENCL_KERNEL
if(originalCpuKernel->supportP2M(FSTARPU_OPENCL_IDX)){
p2m_cl.opencl_funcs[0] = StarPUOpenClWrapperClass::bottomPassCallback;
p2m_cl.where |= STARPU_OPENCL;
......@@ -241,7 +241,7 @@ protected:
m2m_cl[idx].where |= STARPU_CUDA;
}
#endif
#ifdef ScalFMM_ENABLE_CUDA_KERNEL
#ifdef ScalFMM_ENABLE_OPENCL_KERNEL
if(originalCpuKernel->supportM2M(FSTARPU_OPENCL_IDX)){
m2m_cl[idx].opencl_funcs[0] = StarPUOpenClWrapperClass::upwardPassCallback;
m2m_cl[idx].where |= STARPU_OPENCL;
......@@ -264,7 +264,7 @@ protected:
l2l_cl[idx].where |= STARPU_CUDA;
}
#endif
#ifdef ScalFMM_ENABLE_CUDA_KERNEL
#ifdef ScalFMM_ENABLE_OPENCL_KERNEL
if(originalCpuKernel->supportL2L(FSTARPU_OPENCL_IDX)){
l2l_cl[idx].opencl_funcs[0] = StarPUOpenClWrapperClass::downardPassCallback;
l2l_cl[idx].where |= STARPU_OPENCL;
......@@ -294,7 +294,7 @@ protected:
l2p_cl.where |= STARPU_CUDA;
}
#endif
#ifdef ScalFMM_ENABLE_CUDA_KERNEL
#ifdef ScalFMM_ENABLE_OPENCL_KERNEL
if(originalCpuKernel->supportL2P(FSTARPU_OPENCL_IDX)){
l2p_cl.opencl_funcs[0] = StarPUOpenClWrapperClass::mergePassCallback;
l2p_cl.where |= STARPU_OPENCL;
......@@ -318,7 +318,7 @@ protected:
p2p_cl_in.where |= STARPU_CUDA;
}
#endif
#ifdef ScalFMM_ENABLE_CUDA_KERNEL
#ifdef ScalFMM_ENABLE_OPENCL_KERNEL
if(originalCpuKernel->supportP2P(FSTARPU_OPENCL_IDX)){
p2p_cl_in.opencl_funcs[0] = StarPUOpenClWrapperClass::directInPassCallback;
p2p_cl_in.where |= STARPU_OPENCL;
......@@ -340,7 +340,7 @@ protected:
p2p_cl_inout.where |= STARPU_CUDA;
}
#endif
#ifdef ScalFMM_ENABLE_CUDA_KERNEL
#ifdef ScalFMM_ENABLE_OPENCL_KERNEL
if(originalCpuKernel->supportP2P(FSTARPU_OPENCL_IDX)){
p2p_cl_inout.opencl_funcs[0] = StarPUOpenClWrapperClass::directInoutPassCallback;
p2p_cl_inout.where |= STARPU_OPENCL;
......@@ -364,7 +364,7 @@ protected:
m2l_cl_in.where |= STARPU_CUDA;
}
#endif
#ifdef ScalFMM_ENABLE_CUDA_KERNEL
#ifdef ScalFMM_ENABLE_OPENCL_KERNEL
if(originalCpuKernel->supportM2L(FSTARPU_OPENCL_IDX)){
m2l_cl_in.opencl_funcs[0] = StarPUOpenClWrapperClass::transferInPassCallback;
m2l_cl_in.where |= STARPU_OPENCL;
......@@ -387,7 +387,7 @@ protected:
m2l_cl_inout.where |= STARPU_CUDA;
}
#endif
#ifdef ScalFMM_ENABLE_CUDA_KERNEL
#ifdef ScalFMM_ENABLE_OPENCL_KERNEL
if(originalCpuKernel->supportM2L(FSTARPU_OPENCL_IDX)){
m2l_cl_inout.opencl_funcs[0] = StarPUOpenClWrapperClass::transferInoutPassCallback;
m2l_cl_inout.where |= STARPU_OPENCL;
......
......@@ -22,10 +22,9 @@
#include <omp.h>
//extern "C"{
#include <starpu.h>
#include <starpu_mpi.h>
//}
#include "FStarPUUtils.hpp"
#ifdef STARPU_USE_CPU
#include "FStarPUCpuWrapper.hpp"
......@@ -37,15 +36,19 @@
#include "Cuda/FCudaGroupOfParticles.hpp"
#include "Cuda/FCudaGroupOfCells.hpp"
#endif
#ifdef STARPU_USE_OPENCL
#ifdef ScalFMM_ENABLE_OPENCL_KERNEL
#include "FStarPUOpenClWrapper.hpp"
#include "OpenCl/FOpenCLDeviceWrapper.hpp"
#endif
#include "FStarPUUtils.hpp"
template <class OctreeClass, class CellContainerClass, class CellClass, class KernelClass, class ParticleGroupClass, class ParticleContainerClass
#ifdef ScalFMM_ENABLE_CUDA_KERNEL
, class CudaCellContainerClass = FCudaGroupOfCells<0>, class CudaParticleGroupClass = FCudaGroupOfParticles<0, int>, class CudaParticleContainerClass = FCudaGroupAttachedLeaf<0, int>,
class CudaKernelClass = FCudaEmptyKernel<>
#endif
#ifdef ScalFMM_ENABLE_OPENCL_KERNEL
, class OpenCLDeviceWrapperClass = FOpenCLDeviceWrapper<KernelClass, nullptr>
#endif
>
class FGroupTaskStarPUMpiAlgorithm {
......@@ -53,6 +56,9 @@ protected:
typedef FGroupTaskStarPUMpiAlgorithm<OctreeClass, CellContainerClass, CellClass, KernelClass, ParticleGroupClass, ParticleContainerClass
#ifdef ScalFMM_ENABLE_CUDA_KERNEL
, CudaCellContainerClass, CudaParticleGroupClass, CudaParticleContainerClass, CudaKernelClass
#endif
#ifdef ScalFMM_ENABLE_OPENCL_KERNEL
, OpenCLDeviceWrapperClass
#endif
> ThisClass;
......@@ -103,8 +109,8 @@ protected:
typedef FStarPUCudaWrapper<KernelClass, CudaCellContainerClass, CudaParticleGroupClass, CudaParticleContainerClass, CudaKernelClass> StarPUCudaWrapperClass;
StarPUCudaWrapperClass cudaWrapper;
#endif
#ifdef STARPU_USE_OPENCL
typedef FStarPUOpenClWrapper<CellContainerClass, CellClass, KernelClass, ParticleGroupClass, ParticleContainerClass> StarPUOpenClWrapperClass;
#ifdef ScalFMM_ENABLE_OPENCL_KERNEL
typedef FStarPUOpenClWrapper<KernelClass, OpenCLDeviceWrapperClass> StarPUOpenClWrapperClass;
StarPUOpenClWrapperClass openclWrapper;
#endif
......@@ -121,7 +127,7 @@ public:
#ifdef ScalFMM_ENABLE_CUDA_KERNEL
cudaWrapper(tree->getHeight()),
#endif
#ifdef STARPU_USE_OPENCL
#ifdef ScalFMM_ENABLE_OPENCL_KERNEL
openclWrapper(tree->getHeight()),
#endif
wrapperptr(&wrappers){
......@@ -153,7 +159,7 @@ public:
});
wrappers.set(FSTARPU_CUDA_IDX, &cudaWrapper);
#endif
#ifdef STARPU_USE_OPENCL
#ifdef ScalFMM_ENABLE_OPENCL_KERNEL
FStarPUUtils::ExecOnWorkers(STARPU_OPENCL, [&](){
starpu_pthread_mutex_lock(&initMutex);
openclWrapper.initKernel(starpu_worker_get_id(), inKernels);
......@@ -238,7 +244,7 @@ protected:
p2m_cl.where |= STARPU_CUDA;
}
#endif
#ifdef STARPU_USE_OPENCL
#ifdef ScalFMM_ENABLE_OPENCL_KERNEL
if(originalCpuKernel->supportP2M(FSTARPU_OPENCL_IDX)){
p2m_cl.opencl_funcs[0] = StarPUOpenClWrapperClass::bottomPassCallback;
p2m_cl.where |= STARPU_OPENCL;
......@@ -264,7 +270,7 @@ protected:
m2m_cl[idx].where |= STARPU_CUDA;
}
#endif
#ifdef STARPU_USE_OPENCL
#ifdef ScalFMM_ENABLE_OPENCL_KERNEL
if(originalCpuKernel->supportM2M(FSTARPU_OPENCL_IDX)){
m2m_cl[idx].opencl_funcs[0] = StarPUOpenClWrapperClass::upwardPassCallback;
m2m_cl[idx].where |= STARPU_OPENCL;
......@@ -287,7 +293,7 @@ protected:
l2l_cl[idx].where |= STARPU_CUDA;
}
#endif
#ifdef STARPU_USE_OPENCL
#ifdef ScalFMM_ENABLE_OPENCL_KERNEL
if(originalCpuKernel->supportL2L(FSTARPU_OPENCL_IDX)){
l2l_cl[idx].opencl_funcs[0] = StarPUOpenClWrapperClass::downardPassCallback;
l2l_cl[idx].where |= STARPU_OPENCL;
......@@ -317,7 +323,7 @@ protected:
l2p_cl.where |= STARPU_CUDA;
}
#endif
#ifdef STARPU_USE_OPENCL
#ifdef ScalFMM_ENABLE_OPENCL_KERNEL
if(originalCpuKernel->supportL2P(FSTARPU_OPENCL_IDX)){
l2p_cl.opencl_funcs[0] = StarPUOpenClWrapperClass::mergePassCallback;
l2p_cl.where |= STARPU_OPENCL;
......@@ -341,7 +347,7 @@ protected:
p2p_cl_in.where |= STARPU_CUDA;
}
#endif
#ifdef STARPU_USE_OPENCL
#ifdef ScalFMM_ENABLE_OPENCL_KERNEL
if(originalCpuKernel->supportP2P(FSTARPU_OPENCL_IDX)){
p2p_cl_in.opencl_funcs[0] = StarPUOpenClWrapperClass::directInPassCallback;
p2p_cl_in.where |= STARPU_OPENCL;
......@@ -363,7 +369,7 @@ protected:
p2p_cl_inout.where |= STARPU_CUDA;
}
#endif
#ifdef STARPU_USE_OPENCL
#ifdef ScalFMM_ENABLE_OPENCL_KERNEL
if(originalCpuKernel->supportP2P(FSTARPU_OPENCL_IDX)){
p2p_cl_inout.opencl_funcs[0] = StarPUOpenClWrapperClass::directInoutPassCallback;
p2p_cl_inout.where |= STARPU_OPENCL;
......@@ -387,7 +393,7 @@ protected:
m2l_cl_in.where |= STARPU_CUDA;
}
#endif
#ifdef STARPU_USE_OPENCL
#ifdef ScalFMM_ENABLE_OPENCL_KERNEL
if(originalCpuKernel->supportM2L(FSTARPU_OPENCL_IDX)){
m2l_cl_in.opencl_funcs[0] = StarPUOpenClWrapperClass::transferInPassCallback;
m2l_cl_in.where |= STARPU_OPENCL;
......@@ -410,7 +416,7 @@ protected:
m2l_cl_inout.where |= STARPU_CUDA;
}
#endif
#ifdef STARPU_USE_OPENCL
#ifdef ScalFMM_ENABLE_OPENCL_KERNEL
if(originalCpuKernel->supportM2L(FSTARPU_OPENCL_IDX)){
m2l_cl_inout.opencl_funcs[0] = StarPUOpenClWrapperClass::transferInoutPassCallback;
m2l_cl_inout.where |= STARPU_OPENCL;
......@@ -465,7 +471,7 @@ protected:
p2p_cl_inout_mpi.cuda_funcs[0] = StarPUCudaWrapperClass::directInoutPassCallbackMpi;
}
#endif
#ifdef STARPU_USE_OPENCL
#ifdef ScalFMM_ENABLE_OPENCL_KERNEL
if(originalCpuKernel->supportM2L(FSTARPU_OPENCL_IDX)){
p2p_cl_inout_mpi.where |= STARPU_OPENCL;
p2p_cl_inout_mpi.opencl_funcs[0] = StarPUOpenClWrapperClass::directInoutPassCallbackMpi;
......@@ -489,7 +495,7 @@ protected:
m2l_cl_inout_mpi.cuda_funcs[0] = StarPUCudaWrapperClass::transferInoutPassCallbackMpi;
}
#endif
#ifdef STARPU_USE_OPENCL
#ifdef ScalFMM_ENABLE_OPENCL_KERNEL
if(originalCpuKernel->supportM2L(FSTARPU_OPENCL_IDX)){
m2l_cl_inout_mpi.where |= STARPU_OPENCL;
m2l_cl_inout_mpi.opencl_funcs[0] = StarPUOpenClWrapperClass::transferInoutPassCallbackMpi;
......
......@@ -89,7 +89,7 @@ public:
int idxLevel = 0;
starpu_codelet_unpack_args(cl_arg, &worker, &nbSubCellGroups, &idxLevel);
cl_mem* subCellGroupsPtr[9];
cl_mem subCellGroupsPtr[9];
memset(subCellGroupsPtr, 0, 9*sizeof(cl_mem));
size_t subCellGroupsSize[9];
memset(subCellGroupsSize, 0, 9*sizeof(size_t));
......@@ -123,7 +123,7 @@ public:
cl_mem outsideInteractionsCl = clCreateBuffer(kernel->getOpenCLContext(),
CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR | CL_MEM_HOST_NO_ACCESS,
outsideInteractions->size()*sizeof(OutOfBlockInteraction),
outsideInteractions->data(), &errcode_ret);
(void*)outsideInteractions->data(), &errcode_ret);
FAssertLF(outsideInteractionsCl && errcode_ret == CL_SUCCESS);
kernel->transferInoutPassPerformMpi(currentCellsPtr,
......@@ -171,7 +171,7 @@ public:
cl_mem outsideInteractionsCl = clCreateBuffer(kernel->getOpenCLContext(),
CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR | CL_MEM_HOST_NO_ACCESS,
outsideInteractions->size()*sizeof(OutOfBlockInteraction),
outsideInteractions->data(), &errcode_ret);
(void*)outsideInteractions->data(), &errcode_ret);
FAssertLF(outsideInteractionsCl && errcode_ret == CL_SUCCESS);
kernel->transferInoutPassPerform(currentCellsPtr,
......@@ -194,7 +194,7 @@ public:
int idxLevel = 0;
starpu_codelet_unpack_args(cl_arg, &worker, &nbSubCellGroups, &idxLevel);
cl_mem* subCellGroupsPtr[9];
cl_mem subCellGroupsPtr[9];
memset(subCellGroupsPtr, 0, 9*sizeof(cl_mem));
size_t subCellGroupsSize[9];
memset(subCellGroupsSize, 0, 9*sizeof(size_t));
......@@ -228,7 +228,7 @@ public:
cl_mem outsideInteractionsCl = clCreateBuffer(kernel->getOpenCLContext(),
CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR | CL_MEM_HOST_NO_ACCESS,
outsideInteractions->size()*sizeof(OutOfBlockInteraction),
outsideInteractions->data(), &errcode_ret);
(void*)outsideInteractions->data(), &errcode_ret);
FAssertLF(outsideInteractionsCl && errcode_ret == CL_SUCCESS);
kernel->directInoutPassPerformMpi(containersPtr,
......@@ -267,7 +267,7 @@ public:
cl_mem outsideInteractionsCl = clCreateBuffer(kernel->getOpenCLContext(),
CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR | CL_MEM_HOST_NO_ACCESS,
outsideInteractions->size()*sizeof(OutOfBlockInteraction),
outsideInteractions->data(), &errcode_ret);
(void*)outsideInteractions->data(), &errcode_ret);
FAssertLF(outsideInteractionsCl && errcode_ret == CL_SUCCESS);
kernel->directInoutPassPerform(containersPtr,
......
......@@ -22,7 +22,7 @@ protected:
static void SetKernelArgs(cl_kernel& kernel, const int pos){
}
template <class ParamClass, class... Args>
static void SetKernelArgs(cl_kernel* kernel, const int pos, ParamClass* param, Args... args){
static void SetKernelArgs(cl_kernel& kernel, const int pos, ParamClass* param, Args... args){
FAssertLF(clSetKernelArg(kernel, pos, sizeof(*param), param) == 0);
SetKernelArgs(kernel, pos+1, args...);
}
......@@ -32,6 +32,8 @@ protected:
struct starpu_opencl_program opencl_code;
cl_context context;
cl_kernel kernel_bottomPassPerform;
cl_command_queue queue_bottomPassPerform;
......@@ -68,6 +70,8 @@ public:
workerDevid = starpu_worker_get_devid(workerId);
if(KernelFilename){
starpu_opencl_get_context (workerDevid, &context);
const int err = starpu_opencl_load_opencl_from_file(KernelFilename, &opencl_code, NULL);
if(err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
......@@ -90,7 +94,7 @@ public:
virtual void releaseKernel(){
}
~FOpenCLDeviceWrapper(){
virtual ~FOpenCLDeviceWrapper(){
// Release
releaseKernel();
if(KernelFilename){
......@@ -99,86 +103,90 @@ public:
}
}
void bottomPassPerform(unsigned char* leafCellsPtr, size_t leafCellsSize, unsigned char* containersPtr, size_t containersSize){
SetKernelArgs(&kernel_bottomPassPerform, 0, &leafCellsPtr, &leafCellsSize, &containersPtr, &containersSize);
cl_context& getOpenCLContext(){
return context;
}
void bottomPassPerform(cl_mem leafCellsPtr, size_t leafCellsSize, cl_mem containersPtr, size_t containersSize){
SetKernelArgs(kernel_bottomPassPerform, 0, &leafCellsPtr, &leafCellsSize, &containersPtr, &containersSize);
size_t dim = 1;
const int err = clEnqueueNDRangeKernel(queue_bottomPassPerform, kernel_bottomPassPerform, 1, NULL, &dim, NULL, 0, NULL, NULL);
if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
}
void upwardPassPerform(unsigned char* currentCellsPtr, size_t currentCellsSize, unsigned char* subCellGroupsPtr, size_t subCellGroupsSize, int nbSubCellGroups, int idxLevel){
SetKernelArgs(&kernel_upwardPassPerform, 0, &currentCellsPtr, &currentCellsSize, &subCellGroupsPtr, &subCellGroupsSize, &nbSubCellGroups, &idxLevel);
void upwardPassPerform(cl_mem currentCellsPtr, size_t currentCellsSize, cl_mem subCellGroupsPtr[9], size_t subCellGroupsSize[9], int nbSubCellGroups, int idxLevel){
SetKernelArgs(kernel_upwardPassPerform, 0, &currentCellsPtr, &currentCellsSize, &subCellGroupsPtr, &subCellGroupsSize, &nbSubCellGroups, &idxLevel);
size_t dim = 1;
const int err = clEnqueueNDRangeKernel(queue_upwardPassPerform, kernel_upwardPassPerform, 1, NULL, &dim, NULL, 0, NULL, NULL);
if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
}
void transferInoutPassPerformMpi(unsigned char* currentCellsPtr,
size_t currentCellsSize, unsigned char* externalCellsPtr, size_t externalCellsSize, int idxLevel, cl_mem outsideInteractionsCl,
void transferInoutPassPerformMpi(cl_mem currentCellsPtr,
size_t currentCellsSize, cl_mem externalCellsPtr, size_t externalCellsSize, int idxLevel, cl_mem outsideInteractionsCl,
size_t outsideInteractionsSize){
SetKernelArgs(&kernel_transferInoutPassPerformMpi, 0, &currentCellsPtr,&currentCellsSize, &externalCellsPtr, &externalCellsSize, &idxLevel, &outsideInteractionsCl,
SetKernelArgs(kernel_transferInoutPassPerformMpi, 0, &currentCellsPtr,&currentCellsSize, &externalCellsPtr, &externalCellsSize, &idxLevel, &outsideInteractionsCl,
&outsideInteractionsSize);
size_t dim = 1;
const int err = clEnqueueNDRangeKernel(queue_transferInoutPassPerformMpi, kernel_transferInoutPassPerformMpi, 1, NULL, &dim, NULL, 0, NULL, NULL);
if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
}
void transferInPassPerform(unsigned char* currentCellsPtr, size_t currentCellsSize, int idxLevel){
SetKernelArgs(&kernel_transferInPassPerform, 0, &currentCellsPtr, &currentCellsSize, &idxLevel);
void transferInPassPerform(cl_mem currentCellsPtr, size_t currentCellsSize, int idxLevel){
SetKernelArgs(kernel_transferInPassPerform, 0, &currentCellsPtr, &currentCellsSize, &idxLevel);
size_t dim = 1;
const int err = clEnqueueNDRangeKernel(queue_transferInPassPerform, kernel_transferInPassPerform, 1, NULL, &dim, NULL, 0, NULL, NULL);
if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
}
void transferInoutPassPerform(unsigned char* currentCellsPtr,
size_t currentCellsSize, unsigned char* externalCellsPtr, size_t externalCellsSize, int idxLevel, cl_mem outsideInteractionsCl,
void transferInoutPassPerform(cl_mem currentCellsPtr,
size_t currentCellsSize, cl_mem externalCellsPtr, size_t externalCellsSize, int idxLevel, cl_mem outsideInteractionsCl,
size_t outsideInteractionsSize){
SetKernelArgs(&kernel_transferInoutPassPerform, 0, &currentCellsPtr,&currentCellsSize, &externalCellsPtr, &externalCellsSize, &idxLevel, &outsideInteractionsCl,&outsideInteractionsSize);
SetKernelArgs(kernel_transferInoutPassPerform, 0, &currentCellsPtr,&currentCellsSize, &externalCellsPtr, &externalCellsSize, &idxLevel, &outsideInteractionsCl,&outsideInteractionsSize);
size_t dim = 1;
const int err = clEnqueueNDRangeKernel(queue_transferInoutPassPerform, kernel_transferInoutPassPerform, 1, NULL, &dim, NULL, 0, NULL, NULL);
if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
}
void downardPassPerform(unsigned char* currentCellsPtr,
size_t currentCellsSize, unsigned char* subCellGroupsPtr, size_t subCellGroupsSize, int nbSubCellGroups, int idxLevel){
SetKernelArgs(&kernel_downardPassPerform, 0, &currentCellsPtr,
void downardPassPerform(cl_mem currentCellsPtr,
size_t currentCellsSize, cl_mem subCellGroupsPtr[9], size_t subCellGroupsSize[9], int nbSubCellGroups, int idxLevel){
SetKernelArgs(kernel_downardPassPerform, 0, &currentCellsPtr,
&currentCellsSize, &subCellGroupsPtr, &subCellGroupsSize, &nbSubCellGroups, &idxLevel);
size_t dim = 1;
const int err = clEnqueueNDRangeKernel(queue_downardPassPerform, kernel_downardPassPerform, 1, NULL, &dim, NULL, 0, NULL, NULL);
if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
}
void directInoutPassPerformMpi(unsigned char* containersPtr,
size_t containersSize, unsigned char* externalContainersPtr, size_t externalContainersSize, cl_mem outsideInteractionsCl,
void directInoutPassPerformMpi(cl_mem containersPtr,
size_t containersSize, cl_mem externalContainersPtr, size_t externalContainersSize, cl_mem outsideInteractionsCl,
size_t outsideInteractionsSize){
SetKernelArgs(&kernel_directInoutPassPerformMpi, 0, &containersPtr,
SetKernelArgs(kernel_directInoutPassPerformMpi, 0, &containersPtr,
&containersSize, &externalContainersPtr, &externalContainersSize, &outsideInteractionsCl,&outsideInteractionsSize);
size_t dim = 1;
const int err = clEnqueueNDRangeKernel(queue_directInoutPassPerformMpi, kernel_directInoutPassPerformMpi, 1, NULL, &dim, NULL, 0, NULL, NULL);
if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
}
void directInPassPerform(unsigned char* containersPtr, size_t containerSize){
SetKernelArgs(&kernel_directInPassPerform, 0, &containersPtr, &containerSize);
void directInPassPerform(cl_mem containersPtr, size_t containerSize){
SetKernelArgs(kernel_directInPassPerform, 0, &containersPtr, &containerSize);
size_t dim = 1;
const int err = clEnqueueNDRangeKernel(queue_directInPassPerform, kernel_directInPassPerform, 1, NULL, &dim, NULL, 0, NULL, NULL);
if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
}
void directInoutPassPerform(unsigned char* containersPtr,
size_t containerSize, unsigned char* externalContainersPtr, size_t externalContainersSize, cl_mem outsideInteractionsCl,
void directInoutPassPerform(cl_mem containersPtr,
size_t containerSize, cl_mem externalContainersPtr, size_t externalContainersSize, cl_mem outsideInteractionsCl,
size_t outsideInteractionsSize){
SetKernelArgs(&kernel_directInoutPassPerform, 0, &containersPtr,
SetKernelArgs(kernel_directInoutPassPerform, 0, &containersPtr,
&containerSize, &externalContainersPtr, &externalContainersSize, &outsideInteractionsCl, &outsideInteractionsSize);
size_t dim = 1;
const int err = clEnqueueNDRangeKernel(queue_directInoutPassPerform, kernel_directInoutPassPerform, 1, NULL, &dim, NULL, 0, NULL, NULL);
if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
}
void mergePassPerform(unsigned char* leafCellsPtr,
size_t leafCellsSize, unsigned char* containersPtr, size_t containersSize){
SetKernelArgs(&kernel_mergePassPerform, 0, &leafCellsPtr, &leafCellsSize, &containersPtr, &containersSize);
void mergePassPerform(cl_mem leafCellsPtr,
size_t leafCellsSize, cl_mem containersPtr, size_t containersSize){
SetKernelArgs(kernel_mergePassPerform, 0, &leafCellsPtr, &leafCellsSize, &containersPtr, &containersSize);
size_t dim = 1;
const int err = clEnqueueNDRangeKernel(queue_mergePassPerform, kernel_mergePassPerform, 1, NULL, &dim, NULL, 0, NULL, NULL);
if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
......
// Keep in private GIT
// @SCALFMM_PRIVATE
// @FUSE_STARPU
// @FUSE_OPENCL
#include "../../Src/Utils/FGlobal.hpp"
#include "../../Src/GroupTree/FGroupTree.hpp"
#include "../../Src/Components/FSimpleLeaf.hpp"
#include "../../Src/Containers/FVector.hpp"
#include "../../Src/Kernels/P2P/FP2PParticleContainer.hpp"
#include "../../Src/Utils/FMath.hpp"
#include "../../Src/Utils/FMemUtils.hpp"
#include "../../Src/Utils/FParameters.hpp"
#include "../../Src/Files/FRandomLoader.hpp"
#include "../../Src/GroupTree/FGroupSeqAlgorithm.hpp"
#include "../../Src/GroupTree/FGroupTaskStarpuAlgorithm.hpp"
#include "../../Src/GroupTree/FP2PGroupParticleContainer.hpp"
#include "../../Src/GroupTree/FGroupTaskAlgorithm.hpp"
#include "../../Src/Utils/FParameterNames.hpp"
#include "../../Src/Components/FTestParticleContainer.hpp"
#include "../../Src/Components/FTestCell.hpp"
#include "../../Src/Components/FTestKernels.hpp"
#include "../../Src/GroupTree/FGroupTestParticleContainer.hpp"
#include "../../Src/Files/FFmaGenericLoader.hpp"
#include "../../Src/Core/FFmmAlgorithm.hpp"
#include "../../Src/GroupTree/FStarPUKernelCapacities.hpp"
int main(int argc, char* argv[]){
const FParameterNames LocalOptionBlocSize {
{"-bs"},
"The size of the block of the blocked tree"
};
FHelpDescribeAndExit(argc, argv, "Test the blocked tree by counting the particles.",
FParameterDefinitions::OctreeHeight, FParameterDefinitions::NbThreads,
FParameterDefinitions::NbParticles, LocalOptionBlocSize);
// Initialize the types
typedef FTestCell GroupCellClass;
typedef FGroupTestParticleContainer GroupContainerClass;
typedef FGroupTree< GroupCellClass, GroupContainerClass, 2, long long int> GroupOctreeClass;
typedef FStarPUAllYesCapacities<FTestKernels< GroupCellClass, GroupContainerClass >> GroupKernelClass;
typedef FGroupTaskStarPUAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, GroupCellClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass
#ifdef ScalFMM_ENABLE_CUDA_KERNEL
, FCudaGroupOfCells<0>, FCudaGroupOfParticles<0, int>, FCudaGroupAttachedLeaf<0, int>, FCudaEmptyKernel<>
#endif
, FOpenCLDeviceWrapper<GroupKernelClass, nullptr>
> GroupAlgorithm;
typedef FTestCell CellClass;
typedef FTestParticleContainer ContainerClass;
typedef FSimpleLeaf< ContainerClass > LeafClass;
typedef FOctree< CellClass, ContainerClass , LeafClass > OctreeClass;
typedef FTestKernels< CellClass, ContainerClass > KernelClass;
// FFmmAlgorithmTask FFmmAlgorithmThread
typedef FFmmAlgorithm<OctreeClass, CellClass, ContainerClass, KernelClass, LeafClass > FmmClass;
// Get params
const int NbLevels = FParameters::getValue(argc,argv,FParameterDefinitions::OctreeHeight.options, 5);
const int groupSize = FParameters::getValue(argc,argv,LocalOptionBlocSize.options, 250);
//#define LOAD_FILE
#ifndef LOAD_FILE
const int NbParticles = FParameters::getValue(argc,argv,FParameterDefinitions::NbParticles.options, 20);
FRandomLoader loader(NbParticles, 1.0, FPoint(0,0,0), 0);
#else
// Load the particles
const char* const filename = FParameters::getStr(argc,argv,FParameterDefinitions::InputFile.options, "../Data/test20k.fma");
FFmaGenericLoader loader(filename);
#endif
FAssertLF(loader.isOpen());
// Usual octree
OctreeClass tree(NbLevels, 2, loader.getBoxWidth(), loader.getCenterOfBox());
FP2PParticleContainer<> allParticles;
for(int idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){
FPoint particlePosition;
#ifndef LOAD_FILE
loader.fillParticle(&particlePosition);
#else
FReal ph;
loader.fillParticle(&particlePosition, &ph);
#endif
allParticles.push(particlePosition);
tree.insert(particlePosition);
}
// Put the data into the tree
//GroupOctreeClass groupedTree(NbLevels, groupSize, &tree);
GroupOctreeClass groupedTree(NbLevels, loader.getBoxWidth(), loader.getCenterOfBox(), groupSize, &allParticles);
groupedTree.printInfoBlocks();
// Check tree structure at leaf level
groupedTree.forEachCellLeaf<FGroupTestParticleContainer>([&](GroupCellClass* gcell, FGroupTestParticleContainer* gleaf){
const ContainerClass* src = tree.getLeafSrc(gcell->getMortonIndex());
if(src == nullptr){
std::cout << "[PartEmpty] Error cell should not exist " << gcell->getMortonIndex() << "\n";
}
else {
if(src->getNbParticles() != gleaf->getNbParticles()){
std::cout << "[Part] Nb particles is different at index " << gcell->getMortonIndex() << " is " << gleaf->getNbParticles() << " should be " << src->getNbParticles() << "\n";
}
}
});
// Run the algorithm
GroupKernelClass groupkernel;
GroupAlgorithm groupalgo(&groupedTree,&groupkernel);
groupalgo.execute();
// Usual algorithm
KernelClass kernels; // FTestKernels FBasicKernels
FmmClass algo(&tree,&kernels); //FFmmAlgorithm FFmmAlgorithmThread
algo.execute();
// Validate the result
groupedTree.forEachCellLeaf<FGroupTestParticleContainer>([&](GroupCellClass* cell, FGroupTestParticleContainer* leaf){
const int nbPartsInLeaf = leaf->getNbParticles();
if(cell->getDataUp() != nbPartsInLeaf){
std::cout << "[P2M] Error a Cell has " << cell->getDataUp() << " (it should be " << nbPartsInLeaf << ")\n";
}
});
groupedTree.forEachCellLeaf<FGroupTestParticleContainer>([&](GroupCellClass* cell, FGroupTestParticleContainer* leaf){
const int nbPartsInLeaf = leaf->getNbParticles();
const long long int* dataDow