Commit f63bbc46 authored by BRAMAS Berenger's avatar BRAMAS Berenger

make the wrapper working even if they are not called from non-starpu code

parent a6e173f6
...@@ -41,16 +41,13 @@ protected: ...@@ -41,16 +41,13 @@ protected:
typedef FStarPUCudaWrapper<KernelClass, SymboleCellClass, PoleCellClass, LocalCellClass, typedef FStarPUCudaWrapper<KernelClass, SymboleCellClass, PoleCellClass, LocalCellClass,
CudaCellGroupClass, CudaParticleGroupClass, CudaParticleContainerClass, CudaKernelClass> ThisClass; CudaCellGroupClass, CudaParticleGroupClass, CudaParticleContainerClass, CudaKernelClass> ThisClass;
template <class OtherBlockClass>
struct BlockInteractions{
OtherBlockClass* otherBlock;
int otherBlockId;
std::vector<OutOfBlockInteraction> interactions;
};
const int treeHeight; const int treeHeight;
CudaKernelClass* kernels[STARPU_MAXCUDADEVS]; //< The kernels CudaKernelClass* kernels[STARPU_MAXCUDADEVS]; //< The kernels
int getWorkerId() const {
return FMath::Max(0, starpu_worker_get_id());
}
public: public:
FStarPUCudaWrapper(const int inTreeHeight): treeHeight(inTreeHeight){ FStarPUCudaWrapper(const int inTreeHeight): treeHeight(inTreeHeight){
memset(kernels, 0, sizeof(CudaKernelClass*)*STARPU_MAXCUDADEVS); memset(kernels, 0, sizeof(CudaKernelClass*)*STARPU_MAXCUDADEVS);
...@@ -85,7 +82,7 @@ public: ...@@ -85,7 +82,7 @@ public:
int intervalSize; int intervalSize;
starpu_codelet_unpack_args(cl_arg, &worker, &intervalSize, &intervalSize); starpu_codelet_unpack_args(cl_arg, &worker, &intervalSize, &intervalSize);
CudaKernelClass* kernel = worker->get<ThisClass>(FSTARPU_CUDA_IDX)->kernels[starpu_worker_get_id()]; CudaKernelClass* kernel = worker->get<ThisClass>(FSTARPU_CUDA_IDX)->kernels[getWorkerId()];
FCuda__bottomPassCallback< SymboleCellClass, PoleCellClass, LocalCellClass, FCuda__bottomPassCallback< SymboleCellClass, PoleCellClass, LocalCellClass,
CudaCellGroupClass, CudaParticleGroupClass, CudaParticleContainerClass, CudaKernelClass>( CudaCellGroupClass, CudaParticleGroupClass, CudaParticleContainerClass, CudaKernelClass>(
...@@ -109,7 +106,7 @@ public: ...@@ -109,7 +106,7 @@ public:
int intervalSize = 0; int intervalSize = 0;
starpu_codelet_unpack_args(cl_arg, &worker, &nbSubCellGroups, &idxLevel, &intervalSize); starpu_codelet_unpack_args(cl_arg, &worker, &nbSubCellGroups, &idxLevel, &intervalSize);
CudaKernelClass* kernel = worker->get<ThisClass>(FSTARPU_CUDA_IDX)->kernels[starpu_worker_get_id()]; CudaKernelClass* kernel = worker->get<ThisClass>(FSTARPU_CUDA_IDX)->kernels[getWorkerId()];
FCuda__upwardPassCallback< SymboleCellClass, PoleCellClass, LocalCellClass, FCuda__upwardPassCallback< SymboleCellClass, PoleCellClass, LocalCellClass,
CudaCellGroupClass, CudaParticleGroupClass, CudaParticleContainerClass, CudaKernelClass>( CudaCellGroupClass, CudaParticleGroupClass, CudaParticleContainerClass, CudaKernelClass>(
...@@ -134,7 +131,7 @@ public: ...@@ -134,7 +131,7 @@ public:
int intervalSize = 0; int intervalSize = 0;
starpu_codelet_unpack_args(cl_arg, &worker, &idxLevel, &outsideInteractions, &intervalSize); starpu_codelet_unpack_args(cl_arg, &worker, &idxLevel, &outsideInteractions, &intervalSize);
CudaKernelClass* kernel = worker->get<ThisClass>(FSTARPU_CUDA_IDX)->kernels[starpu_worker_get_id()]; CudaKernelClass* kernel = worker->get<ThisClass>(FSTARPU_CUDA_IDX)->kernels[getWorkerId()];
FCuda__transferInoutPassCallbackMpi< SymboleCellClass, PoleCellClass, LocalCellClass, FCuda__transferInoutPassCallbackMpi< SymboleCellClass, PoleCellClass, LocalCellClass,
CudaCellGroupClass, CudaParticleGroupClass, CudaParticleContainerClass, CudaKernelClass>( CudaCellGroupClass, CudaParticleGroupClass, CudaParticleContainerClass, CudaKernelClass>(
...@@ -159,7 +156,7 @@ public: ...@@ -159,7 +156,7 @@ public:
int intervalSize = 0; int intervalSize = 0;
starpu_codelet_unpack_args(cl_arg, &worker, &idxLevel, &intervalSize); starpu_codelet_unpack_args(cl_arg, &worker, &idxLevel, &intervalSize);
CudaKernelClass* kernel = worker->get<ThisClass>(FSTARPU_CUDA_IDX)->kernels[starpu_worker_get_id()]; CudaKernelClass* kernel = worker->get<ThisClass>(FSTARPU_CUDA_IDX)->kernels[getWorkerId()];
FCuda__transferInPassCallback< SymboleCellClass, PoleCellClass, LocalCellClass, FCuda__transferInPassCallback< SymboleCellClass, PoleCellClass, LocalCellClass,
CudaCellGroupClass, CudaParticleGroupClass, CudaParticleContainerClass, CudaKernelClass>( CudaCellGroupClass, CudaParticleGroupClass, CudaParticleContainerClass, CudaKernelClass>(
...@@ -180,7 +177,7 @@ public: ...@@ -180,7 +177,7 @@ public:
starpu_codelet_unpack_args(cl_arg, &worker, &idxLevel, &outsideInteractions, &intervalSize, &mode); starpu_codelet_unpack_args(cl_arg, &worker, &idxLevel, &outsideInteractions, &intervalSize, &mode);
const int nbInteractions = int(outsideInteractions->size()); const int nbInteractions = int(outsideInteractions->size());
CudaKernelClass* kernel = worker->get<ThisClass>(FSTARPU_CUDA_IDX)->kernels[starpu_worker_get_id()]; CudaKernelClass* kernel = worker->get<ThisClass>(FSTARPU_CUDA_IDX)->kernels[getWorkerId()];
// outsideInteractions is sorted following the outIndex // outsideInteractions is sorted following the outIndex
// Compute the cell interval // Compute the cell interval
...@@ -232,7 +229,7 @@ public: ...@@ -232,7 +229,7 @@ public:
int intervalSize = 0; int intervalSize = 0;
starpu_codelet_unpack_args(cl_arg, &worker, &nbSubCellGroups, &idxLevel, &intervalSize); starpu_codelet_unpack_args(cl_arg, &worker, &nbSubCellGroups, &idxLevel, &intervalSize);
CudaKernelClass* kernel = worker->get<ThisClass>(FSTARPU_CUDA_IDX)->kernels[starpu_worker_get_id()]; CudaKernelClass* kernel = worker->get<ThisClass>(FSTARPU_CUDA_IDX)->kernels[getWorkerId()];
FCuda__downardPassCallback< SymboleCellClass, PoleCellClass, LocalCellClass, FCuda__downardPassCallback< SymboleCellClass, PoleCellClass, LocalCellClass,
CudaCellGroupClass, CudaParticleGroupClass, CudaParticleContainerClass, CudaKernelClass>( CudaCellGroupClass, CudaParticleGroupClass, CudaParticleContainerClass, CudaKernelClass>(
...@@ -258,7 +255,7 @@ public: ...@@ -258,7 +255,7 @@ public:
starpu_codelet_unpack_args(cl_arg, &worker, &outsideInteractions, &intervalSize); starpu_codelet_unpack_args(cl_arg, &worker, &outsideInteractions, &intervalSize);
const int nbInteractions = int(outsideInteractions->size()); const int nbInteractions = int(outsideInteractions->size());
CudaKernelClass* kernel = worker->get<ThisClass>(FSTARPU_CUDA_IDX)->kernels[starpu_worker_get_id()]; CudaKernelClass* kernel = worker->get<ThisClass>(FSTARPU_CUDA_IDX)->kernels[getWorkerId()];
std::unique_ptr<int[]> safeOuterInteractions(new int[nbInteractions+1]); std::unique_ptr<int[]> safeOuterInteractions(new int[nbInteractions+1]);
const int counterOuterCell = GetClusterOfInteractionsOutside(safeOuterInteractions.get(), outsideInteractions->data(), nbInteractions); const int counterOuterCell = GetClusterOfInteractionsOutside(safeOuterInteractions.get(), outsideInteractions->data(), nbInteractions);
...@@ -284,7 +281,7 @@ public: ...@@ -284,7 +281,7 @@ public:
FStarPUPtrInterface* worker = nullptr; FStarPUPtrInterface* worker = nullptr;
int intervalSize = 0; int intervalSize = 0;
starpu_codelet_unpack_args(cl_arg, &worker, &intervalSize); starpu_codelet_unpack_args(cl_arg, &worker, &intervalSize);
CudaKernelClass* kernel = worker->get<ThisClass>(FSTARPU_CUDA_IDX)->kernels[starpu_worker_get_id()]; CudaKernelClass* kernel = worker->get<ThisClass>(FSTARPU_CUDA_IDX)->kernels[getWorkerId()];
FCuda__directInPassCallback< SymboleCellClass, PoleCellClass, LocalCellClass, FCuda__directInPassCallback< SymboleCellClass, PoleCellClass, LocalCellClass,
CudaCellGroupClass, CudaParticleGroupClass, CudaParticleContainerClass, CudaKernelClass>( CudaCellGroupClass, CudaParticleGroupClass, CudaParticleContainerClass, CudaKernelClass>(
...@@ -348,7 +345,7 @@ public: ...@@ -348,7 +345,7 @@ public:
starpu_codelet_unpack_args(cl_arg, &worker, &outsideInteractions, &intervalSize); starpu_codelet_unpack_args(cl_arg, &worker, &outsideInteractions, &intervalSize);
const int nbInteractions = int(outsideInteractions->size()); const int nbInteractions = int(outsideInteractions->size());
CudaKernelClass* kernel = worker->get<ThisClass>(FSTARPU_CUDA_IDX)->kernels[starpu_worker_get_id()]; CudaKernelClass* kernel = worker->get<ThisClass>(FSTARPU_CUDA_IDX)->kernels[getWorkerId()];
// outsideInteractions is sorted following the outIndex // outsideInteractions is sorted following the outIndex
// Compute the cell interval // Compute the cell interval
...@@ -394,7 +391,7 @@ public: ...@@ -394,7 +391,7 @@ public:
int intervalSize; int intervalSize;
starpu_codelet_unpack_args(cl_arg, &worker, &intervalSize); starpu_codelet_unpack_args(cl_arg, &worker, &intervalSize);
CudaKernelClass* kernel = worker->get<ThisClass>(FSTARPU_CUDA_IDX)->kernels[starpu_worker_get_id()]; CudaKernelClass* kernel = worker->get<ThisClass>(FSTARPU_CUDA_IDX)->kernels[getWorkerId()];
FCuda__mergePassCallback< SymboleCellClass, PoleCellClass, LocalCellClass, FCuda__mergePassCallback< SymboleCellClass, PoleCellClass, LocalCellClass,
CudaCellGroupClass, CudaParticleGroupClass, CudaParticleContainerClass, CudaKernelClass>( CudaCellGroupClass, CudaParticleGroupClass, CudaParticleContainerClass, CudaKernelClass>(
......
...@@ -45,6 +45,10 @@ protected: ...@@ -45,6 +45,10 @@ protected:
const int treeHeight; const int treeHeight;
OpenCLKernelClass* kernels[STARPU_MAXOPENCLDEVS]; //< The kernels OpenCLKernelClass* kernels[STARPU_MAXOPENCLDEVS]; //< The kernels
int getWorkerId() const {
return FMath::Max(0, starpu_worker_get_id());
}
public: public:
FStarPUOpenClWrapper(const int inTreeHeight): treeHeight(inTreeHeight){ FStarPUOpenClWrapper(const int inTreeHeight): treeHeight(inTreeHeight){
memset(kernels, 0, sizeof(OpenCLKernelClass*)*STARPU_MAXOPENCLDEVS); memset(kernels, 0, sizeof(OpenCLKernelClass*)*STARPU_MAXOPENCLDEVS);
...@@ -87,7 +91,7 @@ public: ...@@ -87,7 +91,7 @@ public:
FStarPUPtrInterface* worker = nullptr; FStarPUPtrInterface* worker = nullptr;
int intervalSize; int intervalSize;
starpu_codelet_unpack_args(cl_arg, &worker, &intervalSize); starpu_codelet_unpack_args(cl_arg, &worker, &intervalSize);
OpenCLKernelClass* kernel = worker->get<ThisClass>(FSTARPU_OPENCL_IDX)->kernels[starpu_worker_get_id()]; OpenCLKernelClass* kernel = worker->get<ThisClass>(FSTARPU_OPENCL_IDX)->kernels[getWorkerId()];
kernel->bottomPassPerform(leafCellsPtr, leafCellsSize, leafCellsUpPtr, containersPtr, containersSize, kernel->bottomPassPerform(leafCellsPtr, leafCellsSize, leafCellsUpPtr, containersPtr, containersSize,
intervalSize); intervalSize);
...@@ -112,7 +116,7 @@ public: ...@@ -112,7 +116,7 @@ public:
size_t otherCellsSize = STARPU_VARIABLE_GET_ELEMSIZE(buffers[2]); size_t otherCellsSize = STARPU_VARIABLE_GET_ELEMSIZE(buffers[2]);
cl_mem otherCellsUpPtr = ((cl_mem)STARPU_VARIABLE_GET_DEV_HANDLE(buffers[3])); cl_mem otherCellsUpPtr = ((cl_mem)STARPU_VARIABLE_GET_DEV_HANDLE(buffers[3]));
OpenCLKernelClass* kernel = worker->get<ThisClass>(FSTARPU_OPENCL_IDX)->kernels[starpu_worker_get_id()]; OpenCLKernelClass* kernel = worker->get<ThisClass>(FSTARPU_OPENCL_IDX)->kernels[getWorkerId()];
kernel->upwardPassPerform(currentCellsPtr, currentCellsSize, currentCellsUpPtr, kernel->upwardPassPerform(currentCellsPtr, currentCellsSize, currentCellsUpPtr,
otherCellsPtr, otherCellsSize, otherCellsUpPtr, idxLevel, otherCellsPtr, otherCellsSize, otherCellsUpPtr, idxLevel,
intervalSize); intervalSize);
...@@ -138,7 +142,7 @@ public: ...@@ -138,7 +142,7 @@ public:
int intervalSize; int intervalSize;
starpu_codelet_unpack_args(cl_arg, &worker, &idxLevel, &outsideInteractions, &intervalSize); starpu_codelet_unpack_args(cl_arg, &worker, &idxLevel, &outsideInteractions, &intervalSize);
OpenCLKernelClass* kernel = worker->get<ThisClass>(FSTARPU_OPENCL_IDX)->kernels[starpu_worker_get_id()]; OpenCLKernelClass* kernel = worker->get<ThisClass>(FSTARPU_OPENCL_IDX)->kernels[getWorkerId()];
cl_int errcode_ret; cl_int errcode_ret;
cl_mem outsideInteractionsCl = clCreateBuffer(kernel->getOpenCLContext(), cl_mem outsideInteractionsCl = clCreateBuffer(kernel->getOpenCLContext(),
CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR | CL_MEM_HOST_NO_ACCESS, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR | CL_MEM_HOST_NO_ACCESS,
...@@ -171,7 +175,7 @@ public: ...@@ -171,7 +175,7 @@ public:
int intervalSize; int intervalSize;
starpu_codelet_unpack_args(cl_arg, &worker, &idxLevel, &intervalSize); starpu_codelet_unpack_args(cl_arg, &worker, &idxLevel, &intervalSize);
OpenCLKernelClass* kernel = worker->get<ThisClass>(FSTARPU_OPENCL_IDX)->kernels[starpu_worker_get_id()]; OpenCLKernelClass* kernel = worker->get<ThisClass>(FSTARPU_OPENCL_IDX)->kernels[getWorkerId()];
kernel->transferInPassPerform(currentCellsPtr, currentCellsSize, currentCellsUpPtr, currentCellsDownPtr, idxLevel, kernel->transferInPassPerform(currentCellsPtr, currentCellsSize, currentCellsUpPtr, currentCellsDownPtr, idxLevel,
intervalSize); intervalSize);
} }
...@@ -192,7 +196,7 @@ public: ...@@ -192,7 +196,7 @@ public:
int mode = 0; int mode = 0;
starpu_codelet_unpack_args(cl_arg, &worker, &idxLevel, &outsideInteractions, &intervalSize, &mode); starpu_codelet_unpack_args(cl_arg, &worker, &idxLevel, &outsideInteractions, &intervalSize, &mode);
OpenCLKernelClass* kernel = worker->get<ThisClass>(FSTARPU_OPENCL_IDX)->kernels[starpu_worker_get_id()]; OpenCLKernelClass* kernel = worker->get<ThisClass>(FSTARPU_OPENCL_IDX)->kernels[getWorkerId()];
cl_int errcode_ret; cl_int errcode_ret;
cl_mem outsideInteractionsCl = clCreateBuffer(kernel->getOpenCLContext(), cl_mem outsideInteractionsCl = clCreateBuffer(kernel->getOpenCLContext(),
CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR | CL_MEM_HOST_NO_ACCESS, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR | CL_MEM_HOST_NO_ACCESS,
...@@ -227,7 +231,7 @@ public: ...@@ -227,7 +231,7 @@ public:
size_t otherCellsSize = STARPU_VARIABLE_GET_ELEMSIZE(buffers[2]); size_t otherCellsSize = STARPU_VARIABLE_GET_ELEMSIZE(buffers[2]);
cl_mem otherCellsDownPtr = ((cl_mem)STARPU_VARIABLE_GET_DEV_HANDLE(buffers[3])); cl_mem otherCellsDownPtr = ((cl_mem)STARPU_VARIABLE_GET_DEV_HANDLE(buffers[3]));
OpenCLKernelClass* kernel = worker->get<ThisClass>(FSTARPU_OPENCL_IDX)->kernels[starpu_worker_get_id()]; OpenCLKernelClass* kernel = worker->get<ThisClass>(FSTARPU_OPENCL_IDX)->kernels[getWorkerId()];
kernel->downardPassPerform(currentCellsPtr, currentCellsSize, currentCellsDownPtr, kernel->downardPassPerform(currentCellsPtr, currentCellsSize, currentCellsDownPtr,
otherCellsPtr, otherCellsSize, otherCellsDownPtr, idxLevel, otherCellsPtr, otherCellsSize, otherCellsDownPtr, idxLevel,
intervalSize); intervalSize);
...@@ -251,7 +255,7 @@ public: ...@@ -251,7 +255,7 @@ public:
int intervalSize; int intervalSize;
starpu_codelet_unpack_args(cl_arg, &worker, &outsideInteractions, &intervalSize); starpu_codelet_unpack_args(cl_arg, &worker, &outsideInteractions, &intervalSize);
OpenCLKernelClass* kernel = worker->get<ThisClass>(FSTARPU_OPENCL_IDX)->kernels[starpu_worker_get_id()]; OpenCLKernelClass* kernel = worker->get<ThisClass>(FSTARPU_OPENCL_IDX)->kernels[getWorkerId()];
cl_int errcode_ret; cl_int errcode_ret;
cl_mem outsideInteractionsCl = clCreateBuffer(kernel->getOpenCLContext(), cl_mem outsideInteractionsCl = clCreateBuffer(kernel->getOpenCLContext(),
CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR | CL_MEM_HOST_NO_ACCESS, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR | CL_MEM_HOST_NO_ACCESS,
...@@ -278,7 +282,7 @@ public: ...@@ -278,7 +282,7 @@ public:
FStarPUPtrInterface* worker = nullptr; FStarPUPtrInterface* worker = nullptr;
int intervalSize; int intervalSize;
starpu_codelet_unpack_args(cl_arg, &worker, &intervalSize); starpu_codelet_unpack_args(cl_arg, &worker, &intervalSize);
OpenCLKernelClass* kernel = worker->get<ThisClass>(FSTARPU_OPENCL_IDX)->kernels[starpu_worker_get_id()]; OpenCLKernelClass* kernel = worker->get<ThisClass>(FSTARPU_OPENCL_IDX)->kernels[getWorkerId()];
kernel->directInPassPerform(containersPtr, containerSize, containersDownPtr, kernel->directInPassPerform(containersPtr, containerSize, containersDownPtr,
intervalSize); intervalSize);
} }
...@@ -297,7 +301,7 @@ public: ...@@ -297,7 +301,7 @@ public:
int intervalSize; int intervalSize;
starpu_codelet_unpack_args(cl_arg, &worker, &outsideInteractions, &intervalSize); starpu_codelet_unpack_args(cl_arg, &worker, &outsideInteractions, &intervalSize);
OpenCLKernelClass* kernel = worker->get<ThisClass>(FSTARPU_OPENCL_IDX)->kernels[starpu_worker_get_id()]; OpenCLKernelClass* kernel = worker->get<ThisClass>(FSTARPU_OPENCL_IDX)->kernels[getWorkerId()];
cl_int errcode_ret; cl_int errcode_ret;
cl_mem outsideInteractionsCl = clCreateBuffer(kernel->getOpenCLContext(), cl_mem outsideInteractionsCl = clCreateBuffer(kernel->getOpenCLContext(),
CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR | CL_MEM_HOST_NO_ACCESS, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR | CL_MEM_HOST_NO_ACCESS,
...@@ -329,7 +333,7 @@ public: ...@@ -329,7 +333,7 @@ public:
FStarPUPtrInterface* worker = nullptr; FStarPUPtrInterface* worker = nullptr;
int intervalSize; int intervalSize;
starpu_codelet_unpack_args(cl_arg, &worker, &intervalSize); starpu_codelet_unpack_args(cl_arg, &worker, &intervalSize);
OpenCLKernelClass* kernel = worker->get<ThisClass>(FSTARPU_OPENCL_IDX)->kernels[starpu_worker_get_id()]; OpenCLKernelClass* kernel = worker->get<ThisClass>(FSTARPU_OPENCL_IDX)->kernels[getWorkerId()];
kernel->mergePassPerform(leafCellsPtr, leafCellsSize, leafCellsDownPtr, kernel->mergePassPerform(leafCellsPtr, leafCellsSize, leafCellsDownPtr,
containersPtr, containersSize, containersDownPtr, containersPtr, containersSize, containersDownPtr,
intervalSize); intervalSize);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment