Commit cb33f0fc authored by BRAMAS Berenger's avatar BRAMAS Berenger
Browse files

make the opencl with division compile but need to update the .cl codes

parent 805fadda
......@@ -156,97 +156,111 @@ public:
return context;
}
void bottomPassPerform(cl_mem leafCellsPtr, size_t leafCellsSize, cl_mem containersPtr, size_t containersSize){
SetKernelArgs(kernel_bottomPassPerform, 0, &leafCellsPtr, &leafCellsSize, &containersPtr, &containersSize, &user_data/*, &outputcl*/);
void bottomPassPerform(cl_mem leafCellsPtr, size_t leafCellsSize, cl_mem leafCellsUpPtr, cl_mem containersPtr, size_t containersSize){
SetKernelArgs(kernel_bottomPassPerform, 0, &leafCellsPtr, &leafCellsSize, &leafCellsUpPtr, &containersPtr, &containersSize, &user_data/*, &outputcl*/);
const int err = clEnqueueNDRangeKernel(queue_bottomPassPerform, kernel_bottomPassPerform, kernelFilename.getNbDims(), NULL,
kernelFilename.getNbGroups(), kernelFilename.getGroupSize(), 0, NULL, NULL);
if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
}
void upwardPassPerform(cl_mem currentCellsPtr, size_t currentCellsSize, cl_mem subCellGroupsPtr[9], size_t subCellGroupsSize[9], int nbSubCellGroups, int idxLevel){
void upwardPassPerform(cl_mem currentCellsPtr, size_t currentCellsSize, cl_mem currentCellsUpPtr,
cl_mem subCellGroupsPtr[9], size_t subCellGroupsSize[9], cl_mem subCellGroupsUpPtr[9],
int nbSubCellGroups, int idxLevel){
Uptr9 ptrs;
memcpy(ptrs.ptrs, subCellGroupsPtr, sizeof(cl_mem)*9);
size_t9 sizes;
memcpy(sizes.v, subCellGroupsSize, sizeof(size_t)*9);
Uptr9 ptrsUp;
memcpy(ptrsUp.ptrs, subCellGroupsUpPtr, sizeof(cl_mem)*9);
SetKernelArgs(kernel_upwardPassPerform, 0, &currentCellsPtr, &currentCellsSize, &ptrs, &sizes, &nbSubCellGroups, &idxLevel, &user_data);
SetKernelArgs(kernel_upwardPassPerform, 0, &currentCellsPtr, &currentCellsSize, &currentCellsUpPtr,
&ptrs, &sizes, &ptrsUp, &nbSubCellGroups, &idxLevel, &user_data);
const int err = clEnqueueNDRangeKernel(queue_upwardPassPerform, kernel_upwardPassPerform, kernelFilename.getNbDims(), NULL,
kernelFilename.getNbGroups(), kernelFilename.getGroupSize(), 0, NULL, NULL);
if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
}
void transferInoutPassPerformMpi(cl_mem currentCellsPtr,
size_t currentCellsSize, cl_mem externalCellsPtr, size_t externalCellsSize, int idxLevel, cl_mem outsideInteractionsCl,
size_t outsideInteractionsSize){
SetKernelArgs(kernel_transferInoutPassPerformMpi, 0, &currentCellsPtr,&currentCellsSize, &externalCellsPtr, &externalCellsSize, &idxLevel, &outsideInteractionsCl,
&outsideInteractionsSize, &user_data);
void transferInoutPassPerformMpi(cl_mem currentCellsPtr, size_t currentCellsSize, cl_mem currentCellsDownPtr,
cl_mem externalCellsPtr, size_t externalCellsSize, cl_mem externalCellsUpPtr,
int idxLevel, cl_mem outsideInteractionsCl, size_t outsideInteractionsSize){
SetKernelArgs(kernel_transferInoutPassPerformMpi, 0, &currentCellsPtr,&currentCellsSize, &currentCellsDownPtr,
&externalCellsPtr, &externalCellsSize, &externalCellsUpPtr,
&idxLevel, &outsideInteractionsCl, &outsideInteractionsSize, &user_data);
const int err = clEnqueueNDRangeKernel(queue_transferInoutPassPerformMpi, kernel_transferInoutPassPerformMpi, kernelFilename.getNbDims(), NULL,
kernelFilename.getNbGroups(), kernelFilename.getGroupSize(), 0, NULL, NULL);
if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
}
void transferInPassPerform(cl_mem currentCellsPtr, size_t currentCellsSize, int idxLevel){
SetKernelArgs(kernel_transferInPassPerform, 0, &currentCellsPtr, &currentCellsSize, &idxLevel, &user_data);
void transferInPassPerform(cl_mem currentCellsPtr, size_t currentCellsSize,
cl_mem currentCellsUpPtr, cl_mem currentCellsDownPtr, int idxLevel){
SetKernelArgs(kernel_transferInPassPerform, 0, &currentCellsPtr, &currentCellsSize, &currentCellsUpPtr,
&currentCellsDownPtr, &idxLevel, &user_data);
const int err = clEnqueueNDRangeKernel(queue_transferInPassPerform, kernel_transferInPassPerform, kernelFilename.getNbDims(), NULL,
kernelFilename.getNbGroups(), kernelFilename.getGroupSize(), 0, NULL, NULL);
if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
}
void transferInoutPassPerform(cl_mem currentCellsPtr,
size_t currentCellsSize, cl_mem externalCellsPtr, size_t externalCellsSize, int idxLevel, cl_mem outsideInteractionsCl,
size_t outsideInteractionsSize){
SetKernelArgs(kernel_transferInoutPassPerform, 0, &currentCellsPtr,&currentCellsSize, &externalCellsPtr, &externalCellsSize, &idxLevel,
&outsideInteractionsCl,&outsideInteractionsSize, &user_data);
void transferInoutPassPerform(cl_mem currentCellsPtr, size_t currentCellsSize, cl_mem currentCellsUpPtr, cl_mem currentCellsDownPtr,
cl_mem externalCellsPtr, size_t externalCellsSize, cl_mem externalCellsUpPtr, cl_mem externalCellsDownPtr,
int idxLevel, cl_mem outsideInteractionsCl, size_t outsideInteractionsSize){
SetKernelArgs(kernel_transferInoutPassPerform, 0, &currentCellsPtr,&currentCellsSize, &currentCellsUpPtr, &currentCellsDownPtr,
&externalCellsPtr, &externalCellsSize, &externalCellsUpPtr, &externalCellsDownPtr,
&idxLevel, &outsideInteractionsCl,&outsideInteractionsSize, &user_data);
const int err = clEnqueueNDRangeKernel(queue_transferInoutPassPerform, kernel_transferInoutPassPerform, kernelFilename.getNbDims(), NULL,
kernelFilename.getNbGroups(), kernelFilename.getGroupSize(), 0, NULL, NULL);
if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
}
void downardPassPerform(cl_mem currentCellsPtr,
size_t currentCellsSize, cl_mem subCellGroupsPtr[9], size_t subCellGroupsSize[9], int nbSubCellGroups, int idxLevel){
void downardPassPerform(cl_mem currentCellsPtr, size_t currentCellsSize, cl_mem currentCellsDownPtr,
cl_mem subCellGroupsPtr[9], size_t subCellGroupsSize[9], cl_mem subCellGroupsDownPtr[9],
int nbSubCellGroups, int idxLevel){
Uptr9 ptrs;
memcpy(ptrs.ptrs, subCellGroupsPtr, sizeof(cl_mem)*9);
size_t9 sizes;
memcpy(sizes.v, subCellGroupsSize, sizeof(size_t)*9);
Uptr9 ptrsDown;
memcpy(ptrsDown.ptrs, subCellGroupsDownPtr, sizeof(cl_mem)*9);
SetKernelArgs(kernel_downardPassPerform, 0, &currentCellsPtr,
&currentCellsSize, &ptrs, &sizes, &nbSubCellGroups, &idxLevel, &user_data);
SetKernelArgs(kernel_downardPassPerform, 0, &currentCellsPtr, &currentCellsSize, &currentCellsDownPtr,
&ptrs, &sizes, &ptrsDown, &nbSubCellGroups, &idxLevel, &user_data);
const int err = clEnqueueNDRangeKernel(queue_downardPassPerform, kernel_downardPassPerform, kernelFilename.getNbDims(), NULL,
kernelFilename.getNbGroups(), kernelFilename.getGroupSize(), 0, NULL, NULL);
if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
}
void directInoutPassPerformMpi(cl_mem containersPtr,
size_t containersSize, cl_mem externalContainersPtr, size_t externalContainersSize, cl_mem outsideInteractionsCl,
size_t outsideInteractionsSize){
SetKernelArgs(kernel_directInoutPassPerformMpi, 0, &containersPtr,
&containersSize, &externalContainersPtr, &externalContainersSize, &outsideInteractionsCl,&outsideInteractionsSize, &treeHeight, &user_data);
void directInoutPassPerformMpi(cl_mem containersPtr, size_t containersSize, cl_mem containersDownPtr,
cl_mem externalContainersPtr, size_t externalContainersSize, cl_mem outsideInteractionsCl,
size_t outsideInteractionsSize){
SetKernelArgs(kernel_directInoutPassPerformMpi, 0, &containersPtr, &containersSize, &containersDownPtr,
&externalContainersPtr, &externalContainersSize, &outsideInteractionsCl,&outsideInteractionsSize, &treeHeight, &user_data);
const int err = clEnqueueNDRangeKernel(queue_directInoutPassPerformMpi, kernel_directInoutPassPerformMpi, kernelFilename.getNbDims(), NULL,
kernelFilename.getNbGroups(), kernelFilename.getGroupSize(), 0, NULL, NULL);
if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
}
void directInPassPerform(cl_mem containersPtr, size_t containerSize){
SetKernelArgs(kernel_directInPassPerform, 0, &containersPtr, &containerSize, &treeHeight, &user_data);
void directInPassPerform(cl_mem containersPtr, size_t containerSize, cl_mem containersDownPtr){
SetKernelArgs(kernel_directInPassPerform, 0, &containersPtr, &containerSize, &containersDownPtr, &treeHeight, &user_data);
const int err = clEnqueueNDRangeKernel(queue_directInPassPerform, kernel_directInPassPerform, kernelFilename.getNbDims(), NULL,
kernelFilename.getNbGroups(), kernelFilename.getGroupSize(), 0, NULL, NULL);
if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
}
void directInoutPassPerform(cl_mem containersPtr,
size_t containerSize, cl_mem externalContainersPtr, size_t externalContainersSize, cl_mem outsideInteractionsCl,
size_t outsideInteractionsSize){
SetKernelArgs(kernel_directInoutPassPerform, 0, &containersPtr,
&containerSize, &externalContainersPtr, &externalContainersSize, &outsideInteractionsCl, &outsideInteractionsSize, &treeHeight, &user_data);
void directInoutPassPerform(cl_mem containersPtr, size_t containerSize, cl_mem containersDownPtr,
cl_mem externalContainersPtr, size_t externalContainersSize, cl_mem externalContainersDownPtr,
cl_mem outsideInteractionsCl, size_t outsideInteractionsSize){
SetKernelArgs(kernel_directInoutPassPerform, 0, &containersPtr, &containerSize, &containersDownPtr,
&externalContainersPtr, &externalContainersSize, &externalContainersDownPtr,
&outsideInteractionsCl, &outsideInteractionsSize, &treeHeight, &user_data);
const int err = clEnqueueNDRangeKernel(queue_directInoutPassPerform, kernel_directInoutPassPerform, kernelFilename.getNbDims(), NULL,
kernelFilename.getNbGroups(), kernelFilename.getGroupSize(), 0, NULL, NULL);
if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
}
void mergePassPerform(cl_mem leafCellsPtr,
size_t leafCellsSize, cl_mem containersPtr, size_t containersSize){
SetKernelArgs(kernel_mergePassPerform, 0, &leafCellsPtr, &leafCellsSize, &containersPtr, &containersSize, &user_data);
void mergePassPerform(cl_mem leafCellsPtr, size_t leafCellsSize, cl_mem leafCellsDownPtr,
cl_mem containersPtr, size_t containersSize, cl_mem containersDownPtr){
SetKernelArgs(kernel_mergePassPerform, 0, &leafCellsPtr, &leafCellsSize, &leafCellsDownPtr,
&containersPtr, &containersSize, &containersDownPtr, &user_data);
const int err = clEnqueueNDRangeKernel(queue_mergePassPerform, kernel_mergePassPerform, kernelFilename.getNbDims(), NULL,
kernelFilename.getNbGroups(), kernelFilename.getGroupSize(), 0, NULL, NULL);
if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
......
......@@ -73,14 +73,16 @@ public:
static void bottomPassCallback(void *buffers[], void *cl_arg){
cl_mem leafCellsPtr = ((cl_mem)STARPU_VARIABLE_GET_DEV_HANDLE(buffers[0]));
size_t leafCellsSize = STARPU_VARIABLE_GET_ELEMSIZE(buffers[0]);
cl_mem containersPtr = ((cl_mem)STARPU_VARIABLE_GET_DEV_HANDLE(buffers[1]));
size_t containersSize = STARPU_VARIABLE_GET_ELEMSIZE(buffers[1]);
cl_mem leafCellsUpPtr = ((cl_mem)STARPU_VARIABLE_GET_DEV_HANDLE(buffers[1]));
cl_mem containersPtr = ((cl_mem)STARPU_VARIABLE_GET_DEV_HANDLE(buffers[2]));
size_t containersSize = STARPU_VARIABLE_GET_ELEMSIZE(buffers[2]);
FStarPUPtrInterface* worker = nullptr;
starpu_codelet_unpack_args(cl_arg, &worker);
OpenCLKernelClass* kernel = worker->get<ThisClass>(FSTARPU_OPENCL_IDX)->kernels[starpu_worker_get_id()];
kernel->bottomPassPerform(leafCellsPtr, leafCellsSize, containersPtr, containersSize);
kernel->bottomPassPerform(leafCellsPtr, leafCellsSize, leafCellsUpPtr, containersPtr, containersSize);
}
/////////////////////////////////////////////////////////////////////////////////////
......@@ -90,6 +92,7 @@ public:
static void upwardPassCallback(void *buffers[], void *cl_arg){
cl_mem currentCellsPtr = ((cl_mem)STARPU_VARIABLE_GET_DEV_HANDLE(buffers[0]));
size_t currentCellsSize = STARPU_VARIABLE_GET_ELEMSIZE(buffers[0]);
cl_mem currentCellsUpPtr = ((cl_mem)STARPU_VARIABLE_GET_DEV_HANDLE(buffers[1]));
FStarPUPtrInterface* worker = nullptr;
int nbSubCellGroups = 0;
......@@ -98,15 +101,20 @@ public:
cl_mem subCellGroupsPtr[9];
memset(subCellGroupsPtr, 0, 9*sizeof(cl_mem));
cl_mem subCellGroupsUpPtr[9];
memset(subCellGroupsUpPtr, 0, 9*sizeof(cl_mem));
size_t subCellGroupsSize[9];
memset(subCellGroupsSize, 0, 9*sizeof(size_t));
for(int idxSubGroup = 0; idxSubGroup < nbSubCellGroups ; ++idxSubGroup){
subCellGroupsPtr[idxSubGroup] = ((cl_mem)STARPU_VARIABLE_GET_DEV_HANDLE(buffers[idxSubGroup+1]));
subCellGroupsSize[idxSubGroup] = (STARPU_VARIABLE_GET_ELEMSIZE(buffers[idxSubGroup+1]));
subCellGroupsPtr[idxSubGroup] = ((cl_mem)STARPU_VARIABLE_GET_DEV_HANDLE(buffers[(idxSubGroup*2)+2]));
subCellGroupsSize[idxSubGroup] = (STARPU_VARIABLE_GET_ELEMSIZE(buffers[(idxSubGroup*2)+2]));
subCellGroupsUpPtr[idxSubGroup] = ((cl_mem)STARPU_VARIABLE_GET_DEV_HANDLE(buffers[(idxSubGroup*2)+3]));
}
OpenCLKernelClass* kernel = worker->get<ThisClass>(FSTARPU_OPENCL_IDX)->kernels[starpu_worker_get_id()];
kernel->upwardPassPerform(currentCellsPtr, currentCellsSize, subCellGroupsPtr, subCellGroupsSize, nbSubCellGroups, idxLevel);
kernel->upwardPassPerform(currentCellsPtr, currentCellsSize, currentCellsUpPtr,
subCellGroupsPtr, subCellGroupsSize, subCellGroupsUpPtr,
nbSubCellGroups, idxLevel);
}
......@@ -117,8 +125,11 @@ public:
static void transferInoutPassCallbackMpi(void *buffers[], void *cl_arg){
cl_mem currentCellsPtr = ((cl_mem)STARPU_VARIABLE_GET_DEV_HANDLE(buffers[0]));
size_t currentCellsSize = STARPU_VARIABLE_GET_ELEMSIZE(buffers[0]);
cl_mem externalCellsPtr = ((cl_mem)STARPU_VARIABLE_GET_DEV_HANDLE(buffers[1]));
size_t externalCellsSize = STARPU_VARIABLE_GET_ELEMSIZE(buffers[1]);
cl_mem currentCellsDownPtr = ((cl_mem)STARPU_VARIABLE_GET_DEV_HANDLE(buffers[1]));
cl_mem externalCellsPtr = ((cl_mem)STARPU_VARIABLE_GET_DEV_HANDLE(buffers[2]));
size_t externalCellsSize = STARPU_VARIABLE_GET_ELEMSIZE(buffers[2]);
cl_mem externalCellsUpPtr = ((cl_mem)STARPU_VARIABLE_GET_DEV_HANDLE(buffers[3]));
FStarPUPtrInterface* worker = nullptr;
int idxLevel = 0;
......@@ -134,8 +145,9 @@ public:
FAssertLF(outsideInteractionsCl && errcode_ret == CL_SUCCESS);
kernel->transferInoutPassPerformMpi(currentCellsPtr,
currentCellsSize, externalCellsPtr, externalCellsSize, idxLevel, outsideInteractionsCl,
outsideInteractions->size());
currentCellsSize, currentCellsDownPtr,
externalCellsPtr, externalCellsSize, externalCellsUpPtr,
idxLevel, outsideInteractionsCl, outsideInteractions->size());
clReleaseMemObject(outsideInteractionsCl);
}
......@@ -148,21 +160,27 @@ public:
static void transferInPassCallback(void *buffers[], void *cl_arg){
cl_mem currentCellsPtr = ((cl_mem)STARPU_VARIABLE_GET_DEV_HANDLE(buffers[0]));
size_t currentCellsSize = STARPU_VARIABLE_GET_ELEMSIZE(buffers[0]);
cl_mem currentCellsUpPtr = ((cl_mem)STARPU_VARIABLE_GET_DEV_HANDLE(buffers[1]));
cl_mem currentCellsDownPtr = ((cl_mem)STARPU_VARIABLE_GET_DEV_HANDLE(buffers[2]));
FStarPUPtrInterface* worker = nullptr;
int idxLevel = 0;
starpu_codelet_unpack_args(cl_arg, &worker, &idxLevel);
OpenCLKernelClass* kernel = worker->get<ThisClass>(FSTARPU_OPENCL_IDX)->kernels[starpu_worker_get_id()];
kernel->transferInPassPerform(currentCellsPtr,
currentCellsSize, idxLevel);
kernel->transferInPassPerform(currentCellsPtr, currentCellsSize, currentCellsUpPtr, currentCellsDownPtr, idxLevel);
}
static void transferInoutPassCallback(void *buffers[], void *cl_arg){
cl_mem currentCellsPtr = ((cl_mem)STARPU_VARIABLE_GET_DEV_HANDLE(buffers[0]));
size_t currentCellsSize = STARPU_VARIABLE_GET_ELEMSIZE(buffers[0]);
cl_mem externalCellsPtr = ((cl_mem)STARPU_VARIABLE_GET_DEV_HANDLE(buffers[1]));
size_t externalCellsSize = STARPU_VARIABLE_GET_ELEMSIZE(buffers[1]);
cl_mem currentCellsUpPtr = ((cl_mem)STARPU_VARIABLE_GET_DEV_HANDLE(buffers[1]));
cl_mem currentCellsDownPtr = ((cl_mem)STARPU_VARIABLE_GET_DEV_HANDLE(buffers[2]));
cl_mem externalCellsPtr = ((cl_mem)STARPU_VARIABLE_GET_DEV_HANDLE(buffers[3]));
size_t externalCellsSize = STARPU_VARIABLE_GET_ELEMSIZE(buffers[3]);
cl_mem externalCellsUpPtr = ((cl_mem)STARPU_VARIABLE_GET_DEV_HANDLE(buffers[4]));
cl_mem externalCellsDownPtr = ((cl_mem)STARPU_VARIABLE_GET_DEV_HANDLE(buffers[4]));
FStarPUPtrInterface* worker = nullptr;
int idxLevel = 0;
......@@ -177,9 +195,9 @@ public:
(void*)outsideInteractions->data(), &errcode_ret);
FAssertLF(outsideInteractionsCl && errcode_ret == CL_SUCCESS);
kernel->transferInoutPassPerform(currentCellsPtr,
currentCellsSize, externalCellsPtr, externalCellsSize, idxLevel, outsideInteractionsCl,
outsideInteractions->size());
kernel->transferInoutPassPerform(currentCellsPtr, currentCellsSize, currentCellsUpPtr, currentCellsDownPtr,
externalCellsPtr, externalCellsSize, externalCellsUpPtr, externalCellsDownPtr,
idxLevel, outsideInteractionsCl, outsideInteractions->size());
clReleaseMemObject(outsideInteractionsCl);
}
......@@ -191,6 +209,7 @@ public:
static void downardPassCallback(void *buffers[], void *cl_arg){
cl_mem currentCellsPtr = ((cl_mem)STARPU_VARIABLE_GET_DEV_HANDLE(buffers[0]));
size_t currentCellsSize = STARPU_VARIABLE_GET_ELEMSIZE(buffers[0]);
cl_mem currentCellsDownPtr = ((cl_mem)STARPU_VARIABLE_GET_DEV_HANDLE(buffers[0]));
FStarPUPtrInterface* worker = nullptr;
int nbSubCellGroups = 0;
......@@ -199,16 +218,20 @@ public:
cl_mem subCellGroupsPtr[9];
memset(subCellGroupsPtr, 0, 9*sizeof(cl_mem));
cl_mem subCellGroupsDownPtr[9];
memset(subCellGroupsDownPtr, 0, 9*sizeof(cl_mem));
size_t subCellGroupsSize[9];
memset(subCellGroupsSize, 0, 9*sizeof(size_t));
for(int idxSubGroup = 0; idxSubGroup < nbSubCellGroups ; ++idxSubGroup){
subCellGroupsPtr[idxSubGroup] = ((cl_mem)STARPU_VARIABLE_GET_DEV_HANDLE(buffers[idxSubGroup+1]));
subCellGroupsSize[idxSubGroup] = (STARPU_VARIABLE_GET_ELEMSIZE(buffers[idxSubGroup+1]));
subCellGroupsPtr[idxSubGroup] = ((cl_mem)STARPU_VARIABLE_GET_DEV_HANDLE(buffers[(idxSubGroup*2)+2]));
subCellGroupsSize[idxSubGroup] = (STARPU_VARIABLE_GET_ELEMSIZE(buffers[(idxSubGroup*2)+2]));
subCellGroupsDownPtr[idxSubGroup] = ((cl_mem)STARPU_VARIABLE_GET_DEV_HANDLE(buffers[(idxSubGroup*2)+3]));
}
OpenCLKernelClass* kernel = worker->get<ThisClass>(FSTARPU_OPENCL_IDX)->kernels[starpu_worker_get_id()];
kernel->downardPassPerform(currentCellsPtr,
currentCellsSize, subCellGroupsPtr, subCellGroupsSize, nbSubCellGroups, idxLevel);
kernel->downardPassPerform(currentCellsPtr, currentCellsSize, currentCellsDownPtr,
subCellGroupsPtr, subCellGroupsSize, subCellGroupsDownPtr,
nbSubCellGroups, idxLevel);
}
/////////////////////////////////////////////////////////////////////////////////////
......@@ -219,8 +242,10 @@ public:
static void directInoutPassCallbackMpi(void *buffers[], void *cl_arg){
cl_mem containersPtr = ((cl_mem)STARPU_VARIABLE_GET_DEV_HANDLE(buffers[0]));
size_t containersSize = STARPU_VARIABLE_GET_ELEMSIZE(buffers[0]);
cl_mem externalContainersPtr = ((cl_mem)STARPU_VARIABLE_GET_DEV_HANDLE(buffers[1]));
size_t externalContainersSize = STARPU_VARIABLE_GET_ELEMSIZE(buffers[1]);
cl_mem containersDownPtr = ((cl_mem)STARPU_VARIABLE_GET_DEV_HANDLE(buffers[1]));
cl_mem externalContainersPtr = ((cl_mem)STARPU_VARIABLE_GET_DEV_HANDLE(buffers[2]));
size_t externalContainersSize = STARPU_VARIABLE_GET_ELEMSIZE(buffers[2]);
FStarPUPtrInterface* worker = nullptr;
const std::vector<OutOfBlockInteraction>* outsideInteractions = nullptr;
......@@ -234,9 +259,8 @@ public:
(void*)outsideInteractions->data(), &errcode_ret);
FAssertLF(outsideInteractionsCl && errcode_ret == CL_SUCCESS);
kernel->directInoutPassPerformMpi(containersPtr,
containersSize, externalContainersPtr, externalContainersSize, outsideInteractionsCl,
outsideInteractions->size());
kernel->directInoutPassPerformMpi(containersPtr, containersSize, containersDownPtr,
externalContainersPtr, externalContainersSize, outsideInteractionsCl, outsideInteractions->size());
clReleaseMemObject(outsideInteractionsCl);
}
......@@ -248,18 +272,22 @@ public:
static void directInPassCallback(void *buffers[], void *cl_arg){
cl_mem containersPtr = ((cl_mem)STARPU_VARIABLE_GET_DEV_HANDLE(buffers[0]));
size_t containerSize = STARPU_VARIABLE_GET_ELEMSIZE(buffers[0]);
cl_mem containersDownPtr = ((cl_mem)STARPU_VARIABLE_GET_DEV_HANDLE(buffers[1]));
FStarPUPtrInterface* worker = nullptr;
starpu_codelet_unpack_args(cl_arg, &worker);
OpenCLKernelClass* kernel = worker->get<ThisClass>(FSTARPU_OPENCL_IDX)->kernels[starpu_worker_get_id()];
kernel->directInPassPerform(containersPtr, containerSize);
kernel->directInPassPerform(containersPtr, containerSize, containersDownPtr);
}
static void directInoutPassCallback(void *buffers[], void *cl_arg){
cl_mem containersPtr = ((cl_mem)STARPU_VARIABLE_GET_DEV_HANDLE(buffers[0]));
size_t containerSize = STARPU_VARIABLE_GET_ELEMSIZE(buffers[0]);
cl_mem externalContainersPtr = ((cl_mem)STARPU_VARIABLE_GET_DEV_HANDLE(buffers[1]));
size_t externalContainersSize = STARPU_VARIABLE_GET_ELEMSIZE(buffers[1]);
cl_mem containersDownPtr = ((cl_mem)STARPU_VARIABLE_GET_DEV_HANDLE(buffers[1]));
cl_mem externalContainersPtr = ((cl_mem)STARPU_VARIABLE_GET_DEV_HANDLE(buffers[2]));
size_t externalContainersSize = STARPU_VARIABLE_GET_ELEMSIZE(buffers[2]);
cl_mem externalContainersDownPtr = ((cl_mem)STARPU_VARIABLE_GET_DEV_HANDLE(buffers[3]));
FStarPUPtrInterface* worker = nullptr;
const std::vector<OutOfBlockInteraction>* outsideInteractions = nullptr;
......@@ -273,9 +301,9 @@ public:
(void*)outsideInteractions->data(), &errcode_ret);
FAssertLF(outsideInteractionsCl && errcode_ret == CL_SUCCESS);
kernel->directInoutPassPerform(containersPtr,
containerSize, externalContainersPtr, externalContainersSize, outsideInteractionsCl,
outsideInteractions->size());
kernel->directInoutPassPerform(containersPtr, containerSize, containersDownPtr,
externalContainersPtr, externalContainersSize, externalContainersDownPtr,
outsideInteractionsCl, outsideInteractions->size());
clReleaseMemObject(outsideInteractionsCl);
}
......@@ -287,14 +315,17 @@ public:
static void mergePassCallback(void *buffers[], void *cl_arg){
cl_mem leafCellsPtr = ((cl_mem)STARPU_VARIABLE_GET_DEV_HANDLE(buffers[0]));
size_t leafCellsSize = STARPU_VARIABLE_GET_ELEMSIZE(buffers[0]);
cl_mem containersPtr = ((cl_mem)STARPU_VARIABLE_GET_DEV_HANDLE(buffers[1]));
size_t containersSize = STARPU_VARIABLE_GET_ELEMSIZE(buffers[1]);
cl_mem leafCellsDownPtr = ((cl_mem)STARPU_VARIABLE_GET_DEV_HANDLE(buffers[1]));
cl_mem containersPtr = ((cl_mem)STARPU_VARIABLE_GET_DEV_HANDLE(buffers[2]));
size_t containersSize = STARPU_VARIABLE_GET_ELEMSIZE(buffers[2]);
cl_mem containersDownPtr = ((cl_mem)STARPU_VARIABLE_GET_DEV_HANDLE(buffers[3]));
FStarPUPtrInterface* worker = nullptr;
starpu_codelet_unpack_args(cl_arg, &worker);
OpenCLKernelClass* kernel = worker->get<ThisClass>(FSTARPU_OPENCL_IDX)->kernels[starpu_worker_get_id()];
kernel->mergePassPerform(leafCellsPtr,
leafCellsSize, containersPtr, containersSize);
kernel->mergePassPerform(leafCellsPtr, leafCellsSize, leafCellsDownPtr,
containersPtr, containersSize, containersDownPtr);
}
};
......
......@@ -28,6 +28,9 @@
#include "../../Src/Utils/FParameterNames.hpp"
#include "../../Src/GroupTree/StarPUUtils/FStarPUCpuWrapper.hpp"
#include "../../Src/GroupTree/StarPUUtils/FStarPUOpenClWrapper.hpp"
#include "../../Src/Components/FTestParticleContainer.hpp"
#include "../../Src/Components/FTestCell.hpp"
#include "../../Src/Components/FTestKernels.hpp"
......@@ -40,6 +43,7 @@
#include "../../Src/GroupTree/TestKernel/FTestOpenCLCode.hpp"
#include "../../Src/GroupTree/TestKernel/FTestCellPOD.hpp"
#include "../../Src/GroupTree/OpenCl/FOpenCLDeviceWrapper.hpp"
int main(int argc, char* argv[]){
setenv("STARPU_NCPU","0",1);
......@@ -59,15 +63,21 @@ int main(int argc, char* argv[]){
FParameterDefinitions::OctreeHeight, FParameterDefinitions::NbThreads,
FParameterDefinitions::NbParticles, LocalOptionBlocSize);
typedef FTestCellPOD GroupCellClass;
typedef FTestCellPODCore GroupCellSymbClass;
typedef FTestCellPODData GroupCellUpClass;
typedef FTestCellPODData GroupCellDownClass;
typedef FTestCellPOD GroupCellClass;
typedef FGroupTestParticleContainer GroupContainerClass;
typedef FGroupTree< GroupCellClass, GroupContainerClass, 2, long long int> GroupOctreeClass;
typedef FGroupTree< GroupCellClass, GroupCellSymbClass, GroupCellUpClass, GroupCellDownClass,
GroupContainerClass, 0, 1, long long int> GroupOctreeClass;
typedef FStarPUAllCpuOpenCLCapacities<FTestKernels< GroupCellClass, GroupContainerClass >> GroupKernelClass;
typedef FGroupTaskStarPUAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, GroupCellClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass
typedef FStarPUCpuWrapper<typename GroupOctreeClass::CellGroupClass, GroupCellClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass> GroupCpuWrapper;
typedef FGroupTaskStarPUAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupCpuWrapper
#ifdef ScalFMM_ENABLE_CUDA_KERNEL
, FCudaGroupOfCells<0>, FCudaGroupOfParticles<0, int>, FCudaGroupAttachedLeaf<0, int>, FCudaEmptyKernel<>
, FStarPUCudaWrapper<KernelClass, FCudaEmptyCell, FCudaGroupOfCells<FCudaEmptyCell>, FCudaGroupOfParticles<0, int>, FCudaGroupAttachedLeaf<0, int>, FCudaEmptyKernel<>>
#endif
, FOpenCLDeviceWrapper<GroupKernelClass, FTestOpenCLCode>
, FStarPUOpenClWrapper<GroupKernelClass, FOpenCLDeviceWrapper<GroupKernelClass, FTestOpenCLCode> >
> GroupAlgorithm;
typedef FTestCell CellClass;
......@@ -116,14 +126,14 @@ int main(int argc, char* argv[]){
groupedTree.printInfoBlocks();
// Check tree structure at leaf level
groupedTree.forEachCellLeaf<FGroupTestParticleContainer>([&](GroupCellClass* gcell, FGroupTestParticleContainer* gleaf){
const ContainerClass* src = tree.getLeafSrc(gcell->getMortonIndex());
groupedTree.forEachCellLeaf<FGroupTestParticleContainer>([&](GroupCellClass gcell, FGroupTestParticleContainer* gleaf){
const ContainerClass* src = tree.getLeafSrc(gcell.getMortonIndex());
if(src == nullptr){
std::cout << "[PartEmpty] Error cell should not exist " << gcell->getMortonIndex() << "\n";
std::cout << "[PartEmpty] Error cell should not exist " << gcell.getMortonIndex() << "\n";
}
else {
if(src->getNbParticles() != gleaf->getNbParticles()){
std::cout << "[Part] Nb particles is different at index " << gcell->getMortonIndex() << " is " << gleaf->getNbParticles() << " should be " << src->getNbParticles() << "\n";
std::cout << "[Part] Nb particles is different at index " << gcell.getMortonIndex() << " is " << gleaf->getNbParticles() << " should be " << src->getNbParticles() << "\n";
}
}
});
......@@ -139,33 +149,33 @@ int main(int argc, char* argv[]){
algo.execute();
// Validate the result
groupedTree.forEachCellLeaf<FGroupTestParticleContainer>([&](GroupCellClass* cell, FGroupTestParticleContainer* leaf){
groupedTree.forEachCellLeaf<FGroupTestParticleContainer>([&](GroupCellClass cell, FGroupTestParticleContainer* leaf){
const int nbPartsInLeaf = leaf->getNbParticles();
if(cell->getDataUp() != nbPartsInLeaf){
std::cout << "[P2M] Error a Cell has " << cell->getDataUp() << " (it should be " << nbPartsInLeaf << ")\n";
if(cell.getDataUp() != nbPartsInLeaf){
std::cout << "[P2M] Error a Cell has " << cell.getDataUp() << " (it should be " << nbPartsInLeaf << ")\n";
}
});
groupedTree.forEachCellLeaf<FGroupTestParticleContainer>([&](GroupCellClass* cell, FGroupTestParticleContainer* leaf){
groupedTree.forEachCellLeaf<FGroupTestParticleContainer>([&](GroupCellClass cell, FGroupTestParticleContainer* leaf){
const int nbPartsInLeaf = leaf->getNbParticles();
const long long int* dataDown = leaf->getDataDown();
for(int idxPart = 0 ; idxPart < nbPartsInLeaf ; ++idxPart){
if(dataDown[idxPart] != loader.getNumberOfParticles()-1){
std::cout << "[Full] Error a particle has " << dataDown[idxPart] << " (it should be " << (loader.getNumberOfParticles()-1) << ") at index " << cell->getMortonIndex() << "\n";
std::cout << "[Full] Error a particle has " << dataDown[idxPart] << " (it should be " << (loader.getNumberOfParticles()-1) << ") at index " << cell.getMortonIndex() << "\n";
}
}
});
// Compare the results
groupedTree.forEachCellWithLevel([&](GroupCellClass* gcell, const int level){
const CellClass* cell = tree.getCell(gcell->getMortonIndex(), level);
groupedTree.forEachCellWithLevel([&](GroupCellClass gcell, const int level){
const CellClass* cell = tree.getCell(gcell.getMortonIndex(), level);
if(cell == nullptr){
std::cout << "[Empty] Error cell should not exist " << gcell->getMortonIndex() << "\n";
std::cout << "[Empty] Error cell should not exist " << gcell.getMortonIndex() << "\n";
}
else {
if(gcell->getDataUp() != cell->getDataUp()){
std::cout << "[Up] Up is different at index " << gcell->getMortonIndex() << " level " << level << " is " << gcell->getDataUp() << " should be " << cell->getDataUp() << "\n";
if(gcell.getDataUp() != cell->getDataUp()){
std::cout << "[Up] Up is different at index " << gcell.getMortonIndex() << " level " << level << " is " << gcell.getDataUp() << " should be " << cell->getDataUp() << "\n";
}
if(gcell->getDataDown() != cell->getDataDown()){
std::cout << "[Down] Down is different at index " << gcell->getMortonIndex() << " level " << level << " is " << gcell->getDataDown() << " should be " << cell->getDataDown() << "\n";
if(gcell.getDataDown() != cell->getDataDown()){
std::cout << "[Down] Down is different at index " << gcell.getMortonIndex() << " level " << level << " is " << gcell.getDataDown() << " should be " << cell->getDataDown() << "\n";
}
}
});
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment