Une MAJ de sécurité est nécessaire sur notre version actuelle. Elle sera effectuée lundi 02/08 entre 12h30 et 13h. L'interruption de service devrait durer quelques minutes (probablement moins de 5 minutes).

Commit fcb8e054 authored by BRAMAS Berenger's avatar BRAMAS Berenger
Browse files

update opencl starpu code

parent b7836e70
......@@ -121,7 +121,7 @@ public:
struct starpu_conf conf;
FAssertLF(starpu_conf_init(&conf) == 0);
conf.ncpus = MaxThreads;
// conf.ncpus = MaxThreads;
FAssertLF(starpu_init(&conf) == 0);
starpu_pthread_mutex_t initMutex;
......@@ -161,7 +161,14 @@ public:
initCodelet();
FLOG(FLog::Controller << "FGroupTaskStarPUAlgorithm (Max Thread " << MaxThreads << ")\n");
FLOG(FLog::Controller << "FGroupTaskStarPUAlgorithm (Max Worker " << starpu_worker_get_count() << ")\n");
FLOG(FLog::Controller << "FGroupTaskStarPUAlgorithm (Max CPU " << starpu_cpu_worker_get_count() << ")\n");
#ifdef ScalFMM_ENABLE_OPENCL_KERNEL
FLOG(FLog::Controller << "FGroupTaskStarPUAlgorithm (Max OpenCL " << starpu_opencl_worker_get_count() << ")\n");
#endif
#ifdef ScalFMM_ENABLE_CUDA_KERNEL
FLOG(FLog::Controller << "FGroupTaskStarPUAlgorithm (Max CUDA " << starpu_cuda_worker_get_count() << ")\n");
#endif
}
~FGroupTaskStarPUAlgorithm(){
......
......@@ -137,7 +137,7 @@ public:
struct starpu_conf conf;
FAssertLF(starpu_conf_init(&conf) == 0);
conf.ncpus = MaxThreads;
//conf.ncpus = MaxThreads;
FAssertLF(starpu_init(&conf) == 0);
FAssertLF(starpu_mpi_init ( 0, 0, 0 ) == 0);
......@@ -179,7 +179,14 @@ public:
initCodelet();
initCodeletMpi();
FLOG(FLog::Controller << "FGroupTaskStarPUMpiAlgorithm (Max Thread " << MaxThreads << ")\n");
FLOG(FLog::Controller << "FGroupTaskStarPUAlgorithm (Max Worker " << starpu_worker_get_count() << ")\n");
FLOG(FLog::Controller << "FGroupTaskStarPUAlgorithm (Max CPU " << starpu_cpu_worker_get_count() << ")\n");
#ifdef ScalFMM_ENABLE_OPENCL_KERNEL
FLOG(FLog::Controller << "FGroupTaskStarPUAlgorithm (Max OpenCL " << starpu_opencl_worker_get_count() << ")\n");
#endif
#ifdef ScalFMM_ENABLE_CUDA_KERNEL
FLOG(FLog::Controller << "FGroupTaskStarPUAlgorithm (Max CUDA " << starpu_cuda_worker_get_count() << ")\n");
#endif
}
~FGroupTaskStarPUMpiAlgorithm(){
......
......@@ -54,7 +54,7 @@ public:
void initKernel(const int workerId, KernelClass* originalKernel){
FAssertLF(kernels[workerId] == nullptr);
kernels[workerId] = new OpenCLKernelClass();
kernels[workerId] = new OpenCLKernelClass(treeHeight);
kernels[workerId]->initDeviceFromKernel(*originalKernel);
}
......
......@@ -475,7 +475,7 @@ __kernel void FOpenCL__bottomPassPerform(__global unsigned char* leafCellsPtr, s
__kernel void FOpenCL__upwardPassPerform(__global unsigned char* currentCellsPtr, size_t currentCellsSize,
struct Uptr9 subCellGroupsPtr, struct size_t9 subCellGroupsSize,
__global void* userkernel, int nbSubCellGroups, int idxLevel){
int nbSubCellGroups, int idxLevel, __global void* userkernel){
struct FOpenCLGroupOfCells currentCells = BuildFOpenCLGroupOfCells(currentCellsPtr, currentCellsSize);
struct FOpenCLGroupOfCells subCellGroups[9];
for(int idx = 0 ; idx < nbSubCellGroups ; ++idx){
......@@ -521,8 +521,8 @@ __kernel void FOpenCL__upwardPassPerform(__global unsigned char* currentCellsPtr
__kernel void FOpenCL__transferInoutPassPerformMpi(__global unsigned char* currentCellsPtr, size_t currentCellsSize,
__global unsigned char* externalCellsPtr, size_t externalCellsSize,
__global void* userkernel, int idxLevel, const __global OutOfBlockInteraction* outsideInteractions,
int nbOutsideInteractions){
int idxLevel, const __global OutOfBlockInteraction* outsideInteractions,
int nbOutsideInteractions, __global void* userkernel){
struct FOpenCLGroupOfCells currentCells = BuildFOpenCLGroupOfCells(currentCellsPtr, currentCellsSize);
struct FOpenCLGroupOfCells cellsOther = BuildFOpenCLGroupOfCells(externalCellsPtr, externalCellsSize);
......@@ -551,7 +551,7 @@ __kernel void FOpenCL__transferInoutPassPerformMpi(__global unsigned char* curr
__kernel void FOpenCL__transferInPassPerform(__global unsigned char* currentCellsPtr, size_t currentCellsSize,
__global void* userkernel, int idxLevel){
int idxLevel, __global void* userkernel){
struct FOpenCLGroupOfCells currentCells = BuildFOpenCLGroupOfCells(currentCellsPtr, currentCellsSize);
const MortonIndex blockStartIdx = FOpenCLGroupOfCells_getStartingIndex(&currentCells);
......@@ -591,8 +591,8 @@ __kernel void FOpenCL__transferInPassPerform(__global unsigned char* currentCel
__kernel void FOpenCL__transferInoutPassPerform(__global unsigned char* currentCellsPtr, size_t currentCellsSize,
__global unsigned char* externalCellsPtr, size_t externalCellsSize,
__global void* userkernel, int idxLevel, const __global OutOfBlockInteraction* outsideInteractions,
int nbOutsideInteractions){
int idxLevel, const __global OutOfBlockInteraction* outsideInteractions,
int nbOutsideInteractions, __global void* userkernel){
struct FOpenCLGroupOfCells currentCells = BuildFOpenCLGroupOfCells(currentCellsPtr, currentCellsSize);
struct FOpenCLGroupOfCells cellsOther = BuildFOpenCLGroupOfCells(externalCellsPtr, externalCellsSize);
......@@ -626,7 +626,7 @@ __kernel void FOpenCL__transferInoutPassPerform(__global unsigned char* currentC
__kernel void FOpenCL__downardPassPerform(__global unsigned char* currentCellsPtr, size_t currentCellsSize,
struct Uptr9 subCellGroupsPtr, struct size_t9 subCellGroupsSize,
__global void* userkernel, int nbSubCellGroups, int idxLevel){
int nbSubCellGroups, int idxLevel, __global void* userkernel){
FOpenCLAssertLF(nbSubCellGroups != 0);
struct FOpenCLGroupOfCells currentCells = BuildFOpenCLGroupOfCells(currentCellsPtr, currentCellsSize);
struct FOpenCLGroupOfCells subCellGroups[9];
......@@ -672,8 +672,8 @@ __kernel void FOpenCL__downardPassPerform(__global unsigned char* currentCellsPt
__kernel void FOpenCL__directInoutPassPerformMpi(__global unsigned char* containersPtr, size_t containersSize,
__global unsigned char* externalContainersPtr, size_t externalContainersSize,
__global void* userkernel, const __global OutOfBlockInteraction* outsideInteractions,
int nbOutsideInteractions, const int treeHeight){
const __global OutOfBlockInteraction* outsideInteractions,
int nbOutsideInteractions, const int treeHeight, __global void* userkernel){
struct FOpenCLGroupOfParticles containers = BuildFOpenCLGroupOfParticles(containersPtr, containersSize);
struct FOpenCLGroupOfParticles containersOther = BuildFOpenCLGroupOfParticles(externalContainersPtr, externalContainersSize);
......@@ -696,7 +696,7 @@ __kernel void FOpenCL__directInoutPassPerformMpi(__global unsigned char* contain
__kernel void FOpenCL__directInPassPerform(__global unsigned char* containersPtr, size_t containersSize,
__global void* userkernel, const int treeHeight){
const int treeHeight, __global void* userkernel){
struct FOpenCLGroupOfParticles containers = BuildFOpenCLGroupOfParticles(containersPtr, containersSize);
const MortonIndex blockStartIdx = FOpenCLGroupOfParticles_getStartingIndex(&containers);
......@@ -733,8 +733,8 @@ __kernel void FOpenCL__directInPassPerform(__global unsigned char* containersPtr
__kernel void FOpenCL__directInoutPassPerform(__global unsigned char* containersPtr, size_t containersSize,
__global unsigned char* externalContainersPtr, size_t externalContainersSize,
__global void* userkernel, const __global OutOfBlockInteraction* outsideInteractions,
int nbOutsideInteractions, const int treeHeight){
const __global OutOfBlockInteraction* outsideInteractions,
int nbOutsideInteractions, const int treeHeight, __global void* userkernel){
struct FOpenCLGroupOfParticles containers = BuildFOpenCLGroupOfParticles(containersPtr, containersSize);
struct FOpenCLGroupOfParticles containersOther = BuildFOpenCLGroupOfParticles(externalContainersPtr, externalContainersSize);
......
......@@ -70,8 +70,11 @@ protected:
cl_kernel kernel_mergePassPerform;
cl_command_queue queue_mergePassPerform;
cl_mem user_data;
int treeHeight;
public:
FOpenCLDeviceWrapper() : workerId(0) , workerDevid(0){
FOpenCLDeviceWrapper(const int inTreeHeight) : workerId(0) , workerDevid(0), user_data(0), treeHeight(inTreeHeight){
workerId = starpu_worker_get_id();
workerDevid = starpu_worker_get_devid(workerId);
......@@ -83,16 +86,16 @@ public:
const int err = starpu_opencl_load_opencl_from_file(filename, &opencl_code, NULL);
if(err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
FAssertLF( starpu_opencl_load_kernel(&kernel_bottomPassPerform, &queue_bottomPassPerform, &opencl_code, "bottomPassPerform", workerDevid) == CL_SUCCESS);
FAssertLF( starpu_opencl_load_kernel(&kernel_upwardPassPerform, &queue_upwardPassPerform, &opencl_code, "upwardPassPerform", workerDevid) == CL_SUCCESS);
FAssertLF( starpu_opencl_load_kernel(&kernel_transferInoutPassPerformMpi, &queue_transferInoutPassPerformMpi, &opencl_code, "transferInoutPassPerformMpi", workerDevid) == CL_SUCCESS);
FAssertLF( starpu_opencl_load_kernel(&kernel_transferInPassPerform, &queue_transferInPassPerform, &opencl_code, "transferInPassPerform", workerDevid) == CL_SUCCESS);
FAssertLF( starpu_opencl_load_kernel(&kernel_transferInoutPassPerform, &queue_transferInoutPassPerform, &opencl_code, "transferInoutPassPerform", workerDevid) == CL_SUCCESS);
FAssertLF( starpu_opencl_load_kernel(&kernel_downardPassPerform, &queue_downardPassPerform, &opencl_code, "downardPassPerform", workerDevid) == CL_SUCCESS);
FAssertLF( starpu_opencl_load_kernel(&kernel_directInoutPassPerformMpi, &queue_directInoutPassPerformMpi, &opencl_code, "directInoutPassPerformMpi", workerDevid) == CL_SUCCESS);
FAssertLF( starpu_opencl_load_kernel(&kernel_directInoutPassPerform, &queue_directInoutPassPerform, &opencl_code, "directInoutPassPerform", workerDevid) == CL_SUCCESS);
FAssertLF( starpu_opencl_load_kernel(&kernel_directInPassPerform, &queue_directInPassPerform, &opencl_code, "directInPassPerform", workerDevid) == CL_SUCCESS);
FAssertLF( starpu_opencl_load_kernel(&kernel_mergePassPerform, &queue_mergePassPerform, &opencl_code, "mergePassPerform", workerDevid) == CL_SUCCESS);
FAssertLF( starpu_opencl_load_kernel(&kernel_bottomPassPerform, &queue_bottomPassPerform, &opencl_code, "FOpenCL__bottomPassPerform", workerDevid) == CL_SUCCESS);
FAssertLF( starpu_opencl_load_kernel(&kernel_upwardPassPerform, &queue_upwardPassPerform, &opencl_code, "FOpenCL__upwardPassPerform", workerDevid) == CL_SUCCESS);
FAssertLF( starpu_opencl_load_kernel(&kernel_transferInoutPassPerformMpi, &queue_transferInoutPassPerformMpi, &opencl_code, "FOpenCL__transferInoutPassPerformMpi", workerDevid) == CL_SUCCESS);
FAssertLF( starpu_opencl_load_kernel(&kernel_transferInPassPerform, &queue_transferInPassPerform, &opencl_code, "FOpenCL__transferInPassPerform", workerDevid) == CL_SUCCESS);
FAssertLF( starpu_opencl_load_kernel(&kernel_transferInoutPassPerform, &queue_transferInoutPassPerform, &opencl_code, "FOpenCL__transferInoutPassPerform", workerDevid) == CL_SUCCESS);
FAssertLF( starpu_opencl_load_kernel(&kernel_downardPassPerform, &queue_downardPassPerform, &opencl_code, "FOpenCL__downardPassPerform", workerDevid) == CL_SUCCESS);
FAssertLF( starpu_opencl_load_kernel(&kernel_directInoutPassPerformMpi, &queue_directInoutPassPerformMpi, &opencl_code, "FOpenCL__directInoutPassPerformMpi", workerDevid) == CL_SUCCESS);
FAssertLF( starpu_opencl_load_kernel(&kernel_directInoutPassPerform, &queue_directInoutPassPerform, &opencl_code, "FOpenCL__directInPassPerform", workerDevid) == CL_SUCCESS);
FAssertLF( starpu_opencl_load_kernel(&kernel_directInPassPerform, &queue_directInPassPerform, &opencl_code, "FOpenCL__directInoutPassPerform", workerDevid) == CL_SUCCESS);
FAssertLF( starpu_opencl_load_kernel(&kernel_mergePassPerform, &queue_mergePassPerform, &opencl_code, "FOpenCL__mergePassPerform", workerDevid) == CL_SUCCESS);
}
}
......@@ -118,7 +121,7 @@ public:
}
void bottomPassPerform(cl_mem leafCellsPtr, size_t leafCellsSize, cl_mem containersPtr, size_t containersSize){
SetKernelArgs(kernel_bottomPassPerform, 0, &leafCellsPtr, &leafCellsSize, &containersPtr, &containersSize);
SetKernelArgs(kernel_bottomPassPerform, 0, &leafCellsPtr, &leafCellsSize, &containersPtr, &containersSize, &user_data);
size_t dim = 1;
const int err = clEnqueueNDRangeKernel(queue_bottomPassPerform, kernel_bottomPassPerform, 1, NULL, &dim, NULL, 0, NULL, NULL);
if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
......@@ -126,7 +129,7 @@ public:
void upwardPassPerform(cl_mem currentCellsPtr, size_t currentCellsSize, cl_mem subCellGroupsPtr[9], size_t subCellGroupsSize[9], int nbSubCellGroups, int idxLevel){
SetKernelArgs(kernel_upwardPassPerform, 0, &currentCellsPtr, &currentCellsSize, &subCellGroupsPtr, &subCellGroupsSize, &nbSubCellGroups, &idxLevel);
SetKernelArgs(kernel_upwardPassPerform, 0, &currentCellsPtr, &currentCellsSize, &subCellGroupsPtr, &subCellGroupsSize, &nbSubCellGroups, &idxLevel, &user_data);
size_t dim = 1;
const int err = clEnqueueNDRangeKernel(queue_upwardPassPerform, kernel_upwardPassPerform, 1, NULL, &dim, NULL, 0, NULL, NULL);
if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
......@@ -136,14 +139,14 @@ public:
size_t currentCellsSize, cl_mem externalCellsPtr, size_t externalCellsSize, int idxLevel, cl_mem outsideInteractionsCl,
size_t outsideInteractionsSize){
SetKernelArgs(kernel_transferInoutPassPerformMpi, 0, &currentCellsPtr,&currentCellsSize, &externalCellsPtr, &externalCellsSize, &idxLevel, &outsideInteractionsCl,
&outsideInteractionsSize);
&outsideInteractionsSize, &user_data);
size_t dim = 1;
const int err = clEnqueueNDRangeKernel(queue_transferInoutPassPerformMpi, kernel_transferInoutPassPerformMpi, 1, NULL, &dim, NULL, 0, NULL, NULL);
if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
}
void transferInPassPerform(cl_mem currentCellsPtr, size_t currentCellsSize, int idxLevel){
SetKernelArgs(kernel_transferInPassPerform, 0, &currentCellsPtr, &currentCellsSize, &idxLevel);
SetKernelArgs(kernel_transferInPassPerform, 0, &currentCellsPtr, &currentCellsSize, &idxLevel, &user_data);
size_t dim = 1;
const int err = clEnqueueNDRangeKernel(queue_transferInPassPerform, kernel_transferInPassPerform, 1, NULL, &dim, NULL, 0, NULL, NULL);
if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
......@@ -152,7 +155,8 @@ public:
void transferInoutPassPerform(cl_mem currentCellsPtr,
size_t currentCellsSize, cl_mem externalCellsPtr, size_t externalCellsSize, int idxLevel, cl_mem outsideInteractionsCl,
size_t outsideInteractionsSize){
SetKernelArgs(kernel_transferInoutPassPerform, 0, &currentCellsPtr,&currentCellsSize, &externalCellsPtr, &externalCellsSize, &idxLevel, &outsideInteractionsCl,&outsideInteractionsSize);
SetKernelArgs(kernel_transferInoutPassPerform, 0, &currentCellsPtr,&currentCellsSize, &externalCellsPtr, &externalCellsSize, &idxLevel,
&outsideInteractionsCl,&outsideInteractionsSize, &user_data);
size_t dim = 1;
const int err = clEnqueueNDRangeKernel(queue_transferInoutPassPerform, kernel_transferInoutPassPerform, 1, NULL, &dim, NULL, 0, NULL, NULL);
if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
......@@ -161,7 +165,7 @@ public:
void downardPassPerform(cl_mem currentCellsPtr,
size_t currentCellsSize, cl_mem subCellGroupsPtr[9], size_t subCellGroupsSize[9], int nbSubCellGroups, int idxLevel){
SetKernelArgs(kernel_downardPassPerform, 0, &currentCellsPtr,
&currentCellsSize, &subCellGroupsPtr, &subCellGroupsSize, &nbSubCellGroups, &idxLevel);
&currentCellsSize, &subCellGroupsPtr, &subCellGroupsSize, &nbSubCellGroups, &idxLevel, &user_data);
size_t dim = 1;
const int err = clEnqueueNDRangeKernel(queue_downardPassPerform, kernel_downardPassPerform, 1, NULL, &dim, NULL, 0, NULL, NULL);
if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
......@@ -171,14 +175,14 @@ public:
size_t containersSize, cl_mem externalContainersPtr, size_t externalContainersSize, cl_mem outsideInteractionsCl,
size_t outsideInteractionsSize){
SetKernelArgs(kernel_directInoutPassPerformMpi, 0, &containersPtr,
&containersSize, &externalContainersPtr, &externalContainersSize, &outsideInteractionsCl,&outsideInteractionsSize);
&containersSize, &externalContainersPtr, &externalContainersSize, &outsideInteractionsCl,&outsideInteractionsSize, &treeHeight, &user_data);
size_t dim = 1;
const int err = clEnqueueNDRangeKernel(queue_directInoutPassPerformMpi, kernel_directInoutPassPerformMpi, 1, NULL, &dim, NULL, 0, NULL, NULL);
if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
}
void directInPassPerform(cl_mem containersPtr, size_t containerSize){
SetKernelArgs(kernel_directInPassPerform, 0, &containersPtr, &containerSize);
SetKernelArgs(kernel_directInPassPerform, 0, &containersPtr, &containerSize, &treeHeight, &user_data);
size_t dim = 1;
const int err = clEnqueueNDRangeKernel(queue_directInPassPerform, kernel_directInPassPerform, 1, NULL, &dim, NULL, 0, NULL, NULL);
if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
......@@ -188,7 +192,7 @@ public:
size_t containerSize, cl_mem externalContainersPtr, size_t externalContainersSize, cl_mem outsideInteractionsCl,
size_t outsideInteractionsSize){
SetKernelArgs(kernel_directInoutPassPerform, 0, &containersPtr,
&containerSize, &externalContainersPtr, &externalContainersSize, &outsideInteractionsCl, &outsideInteractionsSize);
&containerSize, &externalContainersPtr, &externalContainersSize, &outsideInteractionsCl, &outsideInteractionsSize, &treeHeight, &user_data);
size_t dim = 1;
const int err = clEnqueueNDRangeKernel(queue_directInoutPassPerform, kernel_directInoutPassPerform, 1, NULL, &dim, NULL, 0, NULL, NULL);
if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
......@@ -196,7 +200,7 @@ public:
void mergePassPerform(cl_mem leafCellsPtr,
size_t leafCellsSize, cl_mem containersPtr, size_t containersSize){
SetKernelArgs(kernel_mergePassPerform, 0, &leafCellsPtr, &leafCellsSize, &containersPtr, &containersSize);
SetKernelArgs(kernel_mergePassPerform, 0, &leafCellsPtr, &leafCellsSize, &containersPtr, &containersSize, &user_data);
size_t dim = 1;
const int err = clEnqueueNDRangeKernel(queue_mergePassPerform, kernel_mergePassPerform, 1, NULL, &dim, NULL, 0, NULL, NULL);
if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment