Commit d961fa1e authored by BRAMAS Berenger's avatar BRAMAS Berenger
Browse files

update opencl and update the its interface

parent cce925b8
......@@ -21,57 +21,52 @@ public:
MortonIndex insideIndex;\
int relativeOutPosition;\
int insideIdxInBlock;\
int outsideIdxInBlock;\
} __attribute__ ((aligned (DefaultStructAlign)));\
struct Uptr9{\
__global unsigned char* ptrs[9];\
} __attribute__ ((aligned (DefaultStructAlign)));\
struct size_t9{\
size_t v[9];\
}__attribute__ ((aligned (DefaultStructAlign)));\
__kernel void FOpenCL__bottomPassPerform(__global unsigned char* leafCellsPtr, size_t leafCellsSize,__global unsigned char* leafCellsUpPtr,\
__global unsigned char* containersPtr, size_t containersSize,\
__global void* userkernel ){\
__global unsigned char* containersPtr, size_t containersSize,\
__global void* userkernel ){\
}\
__kernel void FOpenCL__upwardPassPerform(__global unsigned char* currentCellsPtr, size_t currentCellsSize, __global unsigned char* currentCellsUpPtr,\
struct Uptr9 subCellGroupsPtr, struct size_t9 subCellGroupsSize, struct Uptr9 subCellGroupsUpPtr,\
int nbSubCellGroups, int idxLevel, __global void* userkernel){\
__global unsigned char* childCellsPtr, size_t childCellsSize, __global unsigned char* childCellsUpPtr,\
int idxLevel, __global void* userkernel){\
}\
__kernel void FOpenCL__transferInoutPassPerformMpi(__global unsigned char* currentCellsPtr, size_t currentCellsSize, __global unsigned char* currentCellsDownPtr,\
__global unsigned char* externalCellsPtr, size_t externalCellsSize, __global unsigned char* externalCellsUpPtr,\
int idxLevel, const __global struct OutOfBlockInteraction* outsideInteractions,\
size_t nbOutsideInteractions, __global void* userkernel){\
__global unsigned char* externalCellsPtr, size_t externalCellsSize, __global unsigned char* externalCellsUpPtr,\
int idxLevel, const __global struct OutOfBlockInteraction* outsideInteractions,\
size_t nbOutsideInteractions, __global void* userkernel){\
}\
__kernel void FOpenCL__transferInPassPerform(__global unsigned char* currentCellsPtr, size_t currentCellsSize,\
__global unsigned char* currentCellsUpPtr, __global unsigned char* currentCellsDownPtr,\
int idxLevel, __global void* userkernel){\
__global unsigned char* currentCellsUpPtr, __global unsigned char* currentCellsDownPtr,\
int idxLevel, __global void* userkernel){\
}\
__kernel void FOpenCL__transferInoutPassPerform(__global unsigned char* currentCellsPtr, size_t currentCellsSize,\
__global unsigned char* currentCellsUpPtr, __global unsigned char* currentCellsDownPtr,\
__global unsigned char* externalCellsPtr, size_t externalCellsSize,\
__global unsigned char* externalCellsUpPtr, __global unsigned char* externalCellsDownPtr,\
int idxLevel, const __global struct OutOfBlockInteraction* outsideInteractions,\
size_t nbOutsideInteractions, __global void* userkernel){\
__global unsigned char* currentCellsUpPtr,\
__global unsigned char* externalCellsPtr, size_t externalCellsSize,\
__global unsigned char* externalCellsDownPtr,\
int idxLevel, int mode, const __global struct OutOfBlockInteraction* outsideInteractions,\
size_t nbOutsideInteractions, __global void* userkernel){\
}\
__kernel void FOpenCL__downardPassPerform(__global unsigned char* currentCellsPtr, size_t currentCellsSize, __global unsigned char* currentCellsDownPtr,\
struct Uptr9 subCellGroupsPtr, struct size_t9 subCellGroupsSize, struct Uptr9 subCellGroupsDownPtr,\
int nbSubCellGroups, int idxLevel, __global void* userkernel){\
__global unsigned char* childCellsPtr, size_t childCellsSize, __global unsigned char* childCellsDownPtr,\
int idxLevel, __global void* userkernel){\
}\
__kernel void FOpenCL__directInoutPassPerformMpi(__global unsigned char* containersPtr, size_t containersSize, __global unsigned char* containersDownPtr,\
__global unsigned char* externalContainersPtr, size_t externalContainersSize, __global unsigned char* outsideInteractionsCl,\
const __global struct OutOfBlockInteraction* outsideInteractions,\
size_t nbOutsideInteractions, const int treeHeight, __global void* userkernel){\
__global unsigned char* externalContainersPtr, size_t externalContainersSize, __global unsigned char* outsideInteractionsCl,\
const __global struct OutOfBlockInteraction* outsideInteractions,\
size_t nbOutsideInteractions, const int treeHeight, __global void* userkernel){\
}\
__kernel void FOpenCL__directInPassPerform(__global unsigned char* containersPtr, size_t containersSize, __global unsigned char* containersDownPtr,\
const int treeHeight, __global void* userkernel){\
const int treeHeight, __global void* userkernel){\
}\
__kernel void FOpenCL__directInoutPassPerform(__global unsigned char* containersPtr, size_t containersSize, __global unsigned char* containersDownPtr,\
__global unsigned char* externalContainersPtr, size_t externalContainersSize, __global unsigned char* externalContainersDownPtr,\
const __global struct OutOfBlockInteraction* outsideInteractions,\
size_t nbOutsideInteractions, const int treeHeight, __global void* userkernel){\
__global unsigned char* externalContainersPtr, size_t externalContainersSize, __global unsigned char* externalContainersDownPtr,\
const __global struct OutOfBlockInteraction* outsideInteractions,\
size_t nbOutsideInteractions, const int treeHeight, __global void* userkernel){\
}\
__kernel void FOpenCL__mergePassPerform(__global unsigned char* leafCellsPtr, size_t leafCellsSize, __global unsigned char* leafCellsDownPtr,\
__global unsigned char* containersPtr, size_t containersSize, __global unsigned char* containersDownPtr,\
__global void* userkernel){\
__global unsigned char* containersPtr, size_t containersSize, __global unsigned char* containersDownPtr,\
__global void* userkernel){\
}";
return kernelcode;
}
......
......@@ -21,14 +21,6 @@
template <class OriginalKernelClass, class KernelFilenameClass = FEmptyOpenCLCode>
class FOpenCLDeviceWrapper {
protected:
struct alignas(FStarPUDefaultAlign::StructAlign) Uptr9{
cl_mem ptrs[9];
};
struct alignas(FStarPUDefaultAlign::StructAlign) size_t9{
size_t v[9];
};
static void SetKernelArgs(cl_kernel& /*kernel*/, const int /*pos*/){
}
template <class ParamClass, class... Args>
......@@ -168,17 +160,11 @@ public:
void upwardPassPerform(cl_mem currentCellsPtr, size_t currentCellsSize, cl_mem currentCellsUpPtr,
cl_mem subCellGroupsPtr[9], size_t subCellGroupsSize[9], cl_mem subCellGroupsUpPtr[9],
int nbSubCellGroups, int idxLevel, const int intervalSize){
Uptr9 ptrs;
memcpy(ptrs.ptrs, subCellGroupsPtr, sizeof(cl_mem)*9);
size_t9 sizes;
memcpy(sizes.v, subCellGroupsSize, sizeof(size_t)*9);
Uptr9 ptrsUp;
memcpy(ptrsUp.ptrs, subCellGroupsUpPtr, sizeof(cl_mem)*9);
cl_mem subCellGroupsPtr, size_t subCellGroupsSize, cl_mem subCellGroupsUpPtr,
int idxLevel, const int intervalSize){
SetKernelArgs(kernel_upwardPassPerform, 0, &currentCellsPtr, &currentCellsSize, &currentCellsUpPtr,
&ptrs, &sizes, &ptrsUp, &nbSubCellGroups, &idxLevel, &user_data);
&subCellGroupsPtr, &subCellGroupsSize, &subCellGroupsUpPtr, &idxLevel, &user_data);
const int err = clEnqueueNDRangeKernel(queue_upwardPassPerform, kernel_upwardPassPerform, kernelFilename.getNbDims(), NULL,
kernelFilename.getNbGroups(intervalSize), kernelFilename.getGroupSize(), 0, NULL, NULL);
if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
......@@ -205,29 +191,22 @@ public:
if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
}
void transferInoutPassPerform(cl_mem currentCellsPtr, size_t currentCellsSize, cl_mem currentCellsUpPtr, cl_mem currentCellsDownPtr,
cl_mem externalCellsPtr, size_t externalCellsSize, cl_mem externalCellsUpPtr, cl_mem externalCellsDownPtr,
int idxLevel, cl_mem outsideInteractionsCl, size_t outsideInteractionsSize, const int intervalSize){
SetKernelArgs(kernel_transferInoutPassPerform, 0, &currentCellsPtr,&currentCellsSize, &currentCellsUpPtr, &currentCellsDownPtr,
&externalCellsPtr, &externalCellsSize, &externalCellsUpPtr, &externalCellsDownPtr,
&idxLevel, &outsideInteractionsCl,&outsideInteractionsSize, &user_data);
void transferInoutPassPerform(cl_mem currentCellsPtr, size_t currentCellsSize, cl_mem currentCellsUpPtr,
cl_mem externalCellsPtr, size_t externalCellsSize, cl_mem externalCellsDownPtr,
int idxLevel, const int mode, cl_mem outsideInteractionsCl, size_t outsideInteractionsSize, const int intervalSize){
SetKernelArgs(kernel_transferInoutPassPerform, 0, &currentCellsPtr,&currentCellsSize, &currentCellsUpPtr,
&externalCellsPtr, &externalCellsSize, &externalCellsDownPtr,
&idxLevel, &mode, &outsideInteractionsCl,&outsideInteractionsSize, &user_data);
const int err = clEnqueueNDRangeKernel(queue_transferInoutPassPerform, kernel_transferInoutPassPerform, kernelFilename.getNbDims(), NULL,
kernelFilename.getNbGroups(intervalSize), kernelFilename.getGroupSize(), 0, NULL, NULL);
if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
}
void downardPassPerform(cl_mem currentCellsPtr, size_t currentCellsSize, cl_mem currentCellsDownPtr,
cl_mem subCellGroupsPtr[9], size_t subCellGroupsSize[9], cl_mem subCellGroupsDownPtr[9],
int nbSubCellGroups, int idxLevel, const int intervalSize){
Uptr9 ptrs;
memcpy(ptrs.ptrs, subCellGroupsPtr, sizeof(cl_mem)*9);
size_t9 sizes;
memcpy(sizes.v, subCellGroupsSize, sizeof(size_t)*9);
Uptr9 ptrsDown;
memcpy(ptrsDown.ptrs, subCellGroupsDownPtr, sizeof(cl_mem)*9);
cl_mem subCellGroupsPtr, size_t subCellGroupsSize, cl_mem subCellGroupsDownPtr,
int idxLevel, const int intervalSize){
SetKernelArgs(kernel_downardPassPerform, 0, &currentCellsPtr, &currentCellsSize, &currentCellsDownPtr,
&ptrs, &sizes, &ptrsDown, &nbSubCellGroups, &idxLevel, &user_data);
&subCellGroupsPtr, &subCellGroupsSize, &subCellGroupsDownPtr, &idxLevel, &user_data);
const int err = clEnqueueNDRangeKernel(queue_downardPassPerform, kernel_downardPassPerform, kernelFilename.getNbDims(), NULL,
kernelFilename.getNbGroups(intervalSize), kernelFilename.getGroupSize(), 0, NULL, NULL);
if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
......
......@@ -100,22 +100,13 @@ public:
int intervalSize;
starpu_codelet_unpack_args(cl_arg, &worker, &nbSubCellGroups, &idxLevel, &intervalSize);
cl_mem subCellGroupsPtr[9];
memset(subCellGroupsPtr, 0, 9*sizeof(cl_mem));
cl_mem subCellGroupsUpPtr[9];
memset(subCellGroupsUpPtr, 0, 9*sizeof(cl_mem));
size_t subCellGroupsSize[9];
memset(subCellGroupsSize, 0, 9*sizeof(size_t));
for(int idxSubGroup = 0; idxSubGroup < nbSubCellGroups ; ++idxSubGroup){
subCellGroupsPtr[idxSubGroup] = ((cl_mem)STARPU_VARIABLE_GET_DEV_HANDLE(buffers[(idxSubGroup*2)+2]));
subCellGroupsSize[idxSubGroup] = (STARPU_VARIABLE_GET_ELEMSIZE(buffers[(idxSubGroup*2)+2]));
subCellGroupsUpPtr[idxSubGroup] = ((cl_mem)STARPU_VARIABLE_GET_DEV_HANDLE(buffers[(idxSubGroup*2)+3]));
}
cl_mem otherCellsPtr = ((cl_mem)STARPU_VARIABLE_GET_DEV_HANDLE(buffers[2]));
size_t otherCellsSize = STARPU_VARIABLE_GET_ELEMSIZE(buffers[2]);
cl_mem otherCellsUpPtr = ((cl_mem)STARPU_VARIABLE_GET_DEV_HANDLE(buffers[3]));
OpenCLKernelClass* kernel = worker->get<ThisClass>(FSTARPU_OPENCL_IDX)->kernels[starpu_worker_get_id()];
kernel->upwardPassPerform(currentCellsPtr, currentCellsSize, currentCellsUpPtr,
subCellGroupsPtr, subCellGroupsSize, subCellGroupsUpPtr,
nbSubCellGroups, idxLevel,
otherCellsPtr, otherCellsSize, otherCellsUpPtr, idxLevel,
intervalSize);
}
......@@ -180,19 +171,18 @@ public:
static void transferInoutPassCallback(void *buffers[], void *cl_arg){
cl_mem currentCellsPtr = ((cl_mem)STARPU_VARIABLE_GET_DEV_HANDLE(buffers[0]));
size_t currentCellsSize = STARPU_VARIABLE_GET_ELEMSIZE(buffers[0]);
cl_mem currentCellsUpPtr = ((cl_mem)STARPU_VARIABLE_GET_DEV_HANDLE(buffers[1]));
cl_mem currentCellsDownPtr = ((cl_mem)STARPU_VARIABLE_GET_DEV_HANDLE(buffers[2]));
cl_mem currentCellsDownPtr = ((cl_mem)STARPU_VARIABLE_GET_DEV_HANDLE(buffers[1]));
cl_mem externalCellsPtr = ((cl_mem)STARPU_VARIABLE_GET_DEV_HANDLE(buffers[3]));
size_t externalCellsSize = STARPU_VARIABLE_GET_ELEMSIZE(buffers[3]);
cl_mem externalCellsUpPtr = ((cl_mem)STARPU_VARIABLE_GET_DEV_HANDLE(buffers[4]));
cl_mem externalCellsDownPtr = ((cl_mem)STARPU_VARIABLE_GET_DEV_HANDLE(buffers[5]));
cl_mem externalCellsPtr = ((cl_mem)STARPU_VARIABLE_GET_DEV_HANDLE(buffers[2]));
size_t externalCellsSize = STARPU_VARIABLE_GET_ELEMSIZE(buffers[2]);
cl_mem externalCellsUpPtr = ((cl_mem)STARPU_VARIABLE_GET_DEV_HANDLE(buffers[3]));
FStarPUPtrInterface* worker = nullptr;
int idxLevel = 0;
const std::vector<OutOfBlockInteraction>* outsideInteractions;
int intervalSize;
starpu_codelet_unpack_args(cl_arg, &worker, &idxLevel, &outsideInteractions, &intervalSize);
int mode = 0;
starpu_codelet_unpack_args(cl_arg, &worker, &idxLevel, &outsideInteractions, &intervalSize, &mode);
OpenCLKernelClass* kernel = worker->get<ThisClass>(FSTARPU_OPENCL_IDX)->kernels[starpu_worker_get_id()];
cl_int errcode_ret;
......@@ -202,9 +192,9 @@ public:
const_cast<OutOfBlockInteraction*>(outsideInteractions->data()), &errcode_ret);
FAssertLF(outsideInteractionsCl && errcode_ret == CL_SUCCESS);
kernel->transferInoutPassPerform(currentCellsPtr, currentCellsSize, currentCellsUpPtr, currentCellsDownPtr,
externalCellsPtr, externalCellsSize, externalCellsUpPtr, externalCellsDownPtr,
idxLevel, outsideInteractionsCl, outsideInteractions->size(),
kernel->transferInoutPassPerform(currentCellsPtr, currentCellsSize, currentCellsDownPtr,
externalCellsPtr, externalCellsSize, externalCellsUpPtr,
idxLevel, mode, outsideInteractionsCl, outsideInteractions->size(),
intervalSize);
clReleaseMemObject(outsideInteractionsCl);
......@@ -225,22 +215,13 @@ public:
int intervalSize;
starpu_codelet_unpack_args(cl_arg, &worker, &nbSubCellGroups, &idxLevel, &intervalSize);
cl_mem subCellGroupsPtr[9];
memset(subCellGroupsPtr, 0, 9*sizeof(cl_mem));
cl_mem subCellGroupsDownPtr[9];
memset(subCellGroupsDownPtr, 0, 9*sizeof(cl_mem));
size_t subCellGroupsSize[9];
memset(subCellGroupsSize, 0, 9*sizeof(size_t));
for(int idxSubGroup = 0; idxSubGroup < nbSubCellGroups ; ++idxSubGroup){
subCellGroupsPtr[idxSubGroup] = ((cl_mem)STARPU_VARIABLE_GET_DEV_HANDLE(buffers[(idxSubGroup*2)+2]));
subCellGroupsSize[idxSubGroup] = (STARPU_VARIABLE_GET_ELEMSIZE(buffers[(idxSubGroup*2)+2]));
subCellGroupsDownPtr[idxSubGroup] = ((cl_mem)STARPU_VARIABLE_GET_DEV_HANDLE(buffers[(idxSubGroup*2)+3]));
}
cl_mem otherCellsPtr = ((cl_mem)STARPU_VARIABLE_GET_DEV_HANDLE(buffers[2]));
size_t otherCellsSize = STARPU_VARIABLE_GET_ELEMSIZE(buffers[2]);
cl_mem otherCellsDownPtr = ((cl_mem)STARPU_VARIABLE_GET_DEV_HANDLE(buffers[3]));
OpenCLKernelClass* kernel = worker->get<ThisClass>(FSTARPU_OPENCL_IDX)->kernels[starpu_worker_get_id()];
kernel->downardPassPerform(currentCellsPtr, currentCellsSize, currentCellsDownPtr,
subCellGroupsPtr, subCellGroupsSize, subCellGroupsDownPtr,
nbSubCellGroups, idxLevel,
otherCellsPtr, otherCellsSize, otherCellsDownPtr, idxLevel,
intervalSize);
}
......
......@@ -51,6 +51,7 @@ struct OutOfBlockInteraction{
MortonIndex insideIndex;
int relativeOutPosition;
int insideIdxInBlock;
int outsideIdxInBlock;
} __attribute__ ((aligned (DefaultStructAlign)));
#define Between(inValue, inMin, inMax) ( (inMin) <= (inValue) && (inValue) < (inMax) )
......@@ -560,12 +561,10 @@ void M2M(struct FWrappeCell pole, struct FWrappeCell child[8], const int level,
}
}
void M2L(struct FWrappeCell const pole, const struct FWrappeCell distantNeighbors[343],
const int size, const int level, __global void* user_data) {
for(int idxNeigh = 0 ; idxNeigh < 343 ; ++idxNeigh){
if(distantNeighbors[idxNeigh].symb){
*pole.down += *distantNeighbors[idxNeigh].up;
}
void M2L(struct FWrappeCell const pole, const struct FWrappeCell* distantNeighbors,
const int* relativePositions, const int size, const int level, __global void* user_data) {
for(int idxNeigh = 0 ; idxNeigh < size ; ++idxNeigh){
*pole.down += *distantNeighbors[idxNeigh].up;
}
}
......@@ -610,6 +609,15 @@ void P2PRemote(const int3 pos,
}
}
void P2POuter(const int3 pos,
struct FOpenCLGroupAttachedLeaf targets, const struct FOpenCLGroupAttachedLeaf sources,
struct FOpenCLGroupAttachedLeaf directNeighborsParticles, const int position, __global void* user_data){
__global long long* partdown = targets.attributes[0];
for(FSize idxPart = 0 ; idxPart < targets.nbParticles ; ++idxPart){
partdown[idxPart] += directNeighborsParticles.nbParticles;
}
}
int3 getCoordinate(const struct FWrappeCell cell) {
int3 coord;
coord.x = cell.symb->coordinates[0];
......@@ -659,49 +667,43 @@ __kernel void FOpenCL__bottomPassPerform(__global unsigned char* leafCellsPtr, s
/////////////////////////////////////////////////////////////////////////////////////
__kernel void FOpenCL__upwardPassPerform(__global unsigned char* currentCellsPtr, size_t currentCellsSize, __global unsigned char* currentCellsUpPtr,
struct Uptr9 subCellGroupsPtr, struct size_t9 subCellGroupsSize, struct Uptr9 subCellGroupsUpPtr,
int nbSubCellGroups, int idxLevel, __global void* userkernel){
__global unsigned char* childCellsPtr, size_t childCellsSize, __global unsigned char* childCellsUpPtr,
int idxLevel, __global void* userkernel){
struct FOpenCLGroupOfCells currentCells = BuildFOpenCLGroupOfCells(currentCellsPtr, currentCellsSize, currentCellsUpPtr, NULLPTR);
struct FOpenCLGroupOfCells subCellGroups[9];
for(int idx = 0 ; idx < nbSubCellGroups ; ++idx){
subCellGroups[idx] = BuildFOpenCLGroupOfCells(subCellGroupsPtr.ptrs[idx], subCellGroupsSize.v[idx], subCellGroupsUpPtr.ptrs[idx], NULLPTR);
}
FOpenCLAssertLF(nbSubCellGroups != 0);
const int nbCells = FOpenCLGroupOfCells_getNumberOfCellsInBlock(&currentCells);
int idxSubCellGroup = 0;
int idxChildCell = FOpenCLGroupOfCells_getFistChildIdx(&subCellGroups[0], FOpenCLGroupOfCells_getCellIndex(&currentCells, 0));
struct FOpenCLGroupOfCells childCells = BuildFOpenCLGroupOfCells(childCellsPtr, childCellsSize, childCellsUpPtr, NULLPTR);
const int childNbCells = FOpenCLGroupOfCells_getNumberOfCellsInBlock(&childCells);
for(int idxCell = 0 ; idxCell < nbCells ; ++idxCell){
struct FWrappeCell cell = FOpenCLGroupOfCells_getUpCell(&currentCells, idxCell);
FOpenCLAssertLF(cell.symb->mortonIndex == FOpenCLGroupOfCells_getCellMortonIndex(&currentCells, idxCell));
struct FWrappeCell child[8];
const MortonIndex firstParent = FOpenCLMax(FOpenCLGroupOfCells_getStartingIndex(&currentCells), FOpenCLGroupOfCells_getStartingIndex(&childCells)>>3);
const MortonIndex lastParent = FOpenCLMin(FOpenCLGroupOfCells_getEndingIndex(&currentCells)-1, (FOpenCLGroupOfCells_getEndingIndex(&childCells)-1)>>3);
FOpenCLAssertLF(idxSubCellGroup != nbSubCellGroups);
int idxParentCell = FOpenCLGroupOfCells_getCellIndex(&currentCells,firstParent);
int idxChildCell = FOpenCLGroupOfCells_getFistChildIdx(&childCells,firstParent);
while(true){
struct FWrappeCell cell = FOpenCLGroupOfCells_getUpCell(&currentCells, idxParentCell);
struct FWrappeCell child[8];
for(int idxChild = 0 ; idxChild < 8 ; ++idxChild){
child[idxChild].symb = NULLPTR;
}
while(idxSubCellGroup != nbSubCellGroups
&& (FOpenCLGroupOfCells_getCellMortonIndex(&subCellGroups[idxSubCellGroup], idxChildCell)>>3) == cell.symb->mortonIndex){
const int idxChild = ((FOpenCLGroupOfCells_getCellMortonIndex(&subCellGroups[idxSubCellGroup], idxChildCell)) & 7);
child[idxChild] = FOpenCLGroupOfCells_getUpCell(&subCellGroups[idxSubCellGroup], idxChildCell);
do{
const int idxChild = ((FOpenCLGroupOfCells_getCellMortonIndex(&childCells,idxChildCell)) & 7);
child[idxChild] = FOpenCLGroupOfCells_getUpCell(&childCells, idxChildCell);
idxChildCell += 1;
}while(idxChildCell != childNbCells && cell.symb->mortonIndex == (FOpenCLGroupOfCells_getCellMortonIndex(&childCells, idxChildCell)>>3));
if(idxChildCell == FOpenCLGroupOfCells_getNumberOfCellsInBlock(&subCellGroups[idxSubCellGroup])){
idxChildCell = 0;
idxSubCellGroup += 1;
}
M2M(cell, child, idxLevel, userkernel);
if(FOpenCLGroupOfCells_getCellMortonIndex(&currentCells, idxParentCell) == lastParent){
break;
}
M2M(cell, child, idxLevel, userkernel);
idxParentCell += 1;
}
}
/////////////////////////////////////////////////////////////////////////////////////
/// Transfer Pass Mpi
/////////////////////////////////////////////////////////////////////////////////////
......@@ -723,11 +725,8 @@ __kernel void FOpenCL__transferInoutPassPerformMpi(__global unsigned char* curr
struct FWrappeCell cell = FOpenCLGroupOfCells_getDownCell(&currentCells, outsideInteractions[outInterIdx].insideIdxInBlock);
FOpenCLAssertLF(cell.symb->mortonIndex == outsideInteractions[outInterIdx].insideIndex);
struct FWrappeCell interactions[343];
FSetToNullptr343(interactions);
interactions[outsideInteractions[outInterIdx].relativeOutPosition] = interCell;
const int counter = 1;
M2L( cell , interactions, counter, idxLevel, userkernel);
M2L( cell , &interCell, &outsideInteractions[outInterIdx].relativeOutPosition,
1, idxLevel, userkernel);
}
}
}
......@@ -766,47 +765,49 @@ __kernel void FOpenCL__transferInPassPerform(__global unsigned char* currentCel
const int cellPos = FOpenCLGroupOfCells_getCellIndex(&currentCells, interactionsIndexes[idxInter]);
if(cellPos != -1){
struct FWrappeCell interCell = FOpenCLGroupOfCells_getUpCell(&currentCells, cellPos);
FOpenCLAssertLF(interCell.symb->mortonIndex == interactionsIndexes[idxInter]);
FOpenCLAssertLF(interactions[interactionsPosition[idxInter]].symb == NULLPTR);
interactions[interactionsPosition[idxInter]] = interCell;
interactions[counterExistingCell] = interCell;
interactionsPosition[counterExistingCell] = interactionsPosition[idxInter];
counterExistingCell += 1;
}
}
}
M2L( cell , interactions, counterExistingCell, idxLevel, userkernel);
M2L( cell , interactions, interactionsPosition,
counterExistingCell, idxLevel, userkernel);
}
}
__kernel void FOpenCL__transferInoutPassPerform(__global unsigned char* currentCellsPtr, size_t currentCellsSize,
__global unsigned char* currentCellsUpPtr, __global unsigned char* currentCellsDownPtr,
__global unsigned char* currentCellsUpPtr,
__global unsigned char* externalCellsPtr, size_t externalCellsSize,
__global unsigned char* externalCellsUpPtr, __global unsigned char* externalCellsDownPtr,
int idxLevel, const __global struct OutOfBlockInteraction* outsideInteractions,
__global unsigned char* externalCellsDownPtr,
int idxLevel, int mode, const __global struct OutOfBlockInteraction* outsideInteractions,
size_t nbOutsideInteractions, __global void* userkernel){
struct FOpenCLGroupOfCells currentCells = BuildFOpenCLGroupOfCells(currentCellsPtr, currentCellsSize, currentCellsUpPtr, currentCellsDownPtr);
struct FOpenCLGroupOfCells cellsOther = BuildFOpenCLGroupOfCells(externalCellsPtr, externalCellsSize, externalCellsUpPtr, externalCellsDownPtr);
struct FOpenCLGroupOfCells currentCells = BuildFOpenCLGroupOfCells(currentCellsPtr, currentCellsSize, currentCellsUpPtr, NULLPTR);
struct FOpenCLGroupOfCells cellsOther = BuildFOpenCLGroupOfCells(externalCellsPtr, externalCellsSize, NULLPTR, externalCellsDownPtr);
for(int outInterIdx = 0 ; outInterIdx < nbOutsideInteractions ; ++outInterIdx){
const int cellPos = FOpenCLGroupOfCells_getCellIndex(&cellsOther, outsideInteractions[outInterIdx].outIndex);
if(cellPos != -1){
FOpenCLAssertLF(outsideInteractions[outInterIdx].outIndex == FOpenCLGroupOfCells_getCellMortonIndex(&cellsOther, outsideInteractions[outInterIdx].outIndex));
struct FWrappeCell interCell = FOpenCLGroupOfCells_getUpCell(&cellsOther, cellPos);
if(mode == 1){
for(int outInterIdx = 0 ; outInterIdx < nbOutsideInteractions ; ++outInterIdx){
struct FWrappeCell interCell = FOpenCLGroupOfCells_getUpCell(&cellsOther, outsideInteractions[outInterIdx].outsideIdxInBlock);
FOpenCLAssertLF(interCell.symb->mortonIndex == outsideInteractions[outInterIdx].outIndex);
struct FWrappeCell cell = FOpenCLGroupOfCells_getDownCell(&currentCells, outsideInteractions[outInterIdx].insideIdxInBlock);
FOpenCLAssertLF(cell.symb->mortonIndex == outsideInteractions[outInterIdx].insideIndex);
struct FWrappeCell interactions[343];
FSetToNullptr343(interactions);
interactions[outsideInteractions[outInterIdx].relativeOutPosition] = interCell;
const int counter = 1;
M2L( cell , interactions, counter, idxLevel, userkernel);
M2L( cell , &interCell, &outsideInteractions[outInterIdx].relativeOutPosition,
1, idxLevel, userkernel);
}
}
else{
for(int outInterIdx = 0 ; outInterIdx < nbOutsideInteractions ; ++outInterIdx){
struct FWrappeCell interCell = FOpenCLGroupOfCells_getDownCell(&cellsOther, outsideInteractions[outInterIdx].outsideIdxInBlock);
FOpenCLAssertLF(interCell.symb->mortonIndex == outsideInteractions[outInterIdx].outIndex);
struct FWrappeCell cell = FOpenCLGroupOfCells_getUpCell(&currentCells, outsideInteractions[outInterIdx].insideIdxInBlock);
FOpenCLAssertLF(cell.symb->mortonIndex == outsideInteractions[outInterIdx].insideIndex);
interactions[outsideInteractions[outInterIdx].relativeOutPosition].symb = NULLPTR;
interactions[FMGetOppositeInterIndex(outsideInteractions[outInterIdx].relativeOutPosition)] = cell;
M2L( interCell , interactions, counter, idxLevel, userkernel);
const int relativepos = FMGetOppositeInterIndex(outsideInteractions[outInterIdx].relativeOutPosition);
M2L( interCell , &cell, &relativepos, 1, idxLevel, userkernel);
}
}
}
......@@ -819,46 +820,39 @@ __kernel void FOpenCL__transferInoutPassPerform(__global unsigned char* currentC
__kernel void FOpenCL__downardPassPerform(__global unsigned char* currentCellsPtr, size_t currentCellsSize, __global unsigned char* currentCellsDownPtr,
struct Uptr9 subCellGroupsPtr, struct size_t9 subCellGroupsSize, struct Uptr9 subCellGroupsDownPtr,
int nbSubCellGroups, int idxLevel, __global void* userkernel){
FOpenCLAssertLF(nbSubCellGroups != 0);
__global unsigned char* childCellsPtr, size_t childCellsSize, __global unsigned char* childCellsDownPtr,
int idxLevel, __global void* userkernel){
struct FOpenCLGroupOfCells currentCells = BuildFOpenCLGroupOfCells(currentCellsPtr, currentCellsSize, NULLPTR, currentCellsDownPtr);
struct FOpenCLGroupOfCells subCellGroups[9];
for(int idx = 0 ; idx < nbSubCellGroups ; ++idx){
subCellGroups[idx] = BuildFOpenCLGroupOfCells(subCellGroupsPtr.ptrs[idx], subCellGroupsSize.v[idx], NULLPTR, subCellGroupsDownPtr.ptrs[idx]);
}
FOpenCLAssertLF(nbSubCellGroups != 0);
const int nbCells = FOpenCLGroupOfCells_getNumberOfCellsInBlock(&currentCells);
int idxSubCellGroup = 0;
int idxChildCell = FOpenCLGroupOfCells_getFistChildIdx(&subCellGroups[0], FOpenCLGroupOfCells_getCellIndex(&currentCells, 0));
struct FOpenCLGroupOfCells childCells = BuildFOpenCLGroupOfCells(childCellsPtr, childCellsSize, NULLPTR, childCellsDownPtr);
const int childNbCells = FOpenCLGroupOfCells_getNumberOfCellsInBlock(&childCells);
for(int idxCell = 0 ; idxCell < nbCells ; ++idxCell){
struct FWrappeCell cell = FOpenCLGroupOfCells_getDownCell(&currentCells, idxCell);
FOpenCLAssertLF(cell.symb->mortonIndex == FOpenCLGroupOfCells_getCellMortonIndex(&currentCells, idxCell));
struct FWrappeCell child[8];
const MortonIndex firstParent = FOpenCLMax(FOpenCLGroupOfCells_getStartingIndex(&currentCells), FOpenCLGroupOfCells_getStartingIndex(&childCells)>>3);
const MortonIndex lastParent = FOpenCLMin(FOpenCLGroupOfCells_getEndingIndex(&currentCells)-1, (FOpenCLGroupOfCells_getEndingIndex(&childCells)-1)>>3);
FOpenCLAssertLF(idxSubCellGroup != nbSubCellGroups);
int idxParentCell = FOpenCLGroupOfCells_getCellIndex(&currentCells,firstParent);
int idxChildCell = FOpenCLGroupOfCells_getFistChildIdx(&childCells,firstParent);
while(true){
struct FWrappeCell cell = FOpenCLGroupOfCells_getDownCell(&currentCells, idxParentCell);
struct FWrappeCell child[8];
for(int idxChild = 0 ; idxChild < 8 ; ++idxChild){
child[idxChild].symb = NULLPTR;
}
while(idxSubCellGroup != nbSubCellGroups
&& (FOpenCLGroupOfCells_getCellMortonIndex(&subCellGroups[idxSubCellGroup], idxChildCell)>>3) == cell.symb->mortonIndex){
const int idxChild = ((FOpenCLGroupOfCells_getCellMortonIndex(&subCellGroups[idxSubCellGroup], idxChildCell)) & 7);
child[idxChild] = FOpenCLGroupOfCells_getDownCell(&subCellGroups[idxSubCellGroup], idxChildCell);
do{
const int idxChild = ((FOpenCLGroupOfCells_getCellMortonIndex(&childCells,idxChildCell)) & 7);
child[idxChild] = FOpenCLGroupOfCells_getDownCell(&childCells, idxChildCell);
idxChildCell += 1;
}while(idxChildCell != childNbCells && cell.symb->mortonIndex == (FOpenCLGroupOfCells_getCellMortonIndex(&childCells, idxChildCell)>>3));
if(idxChildCell == FOpenCLGroupOfCells_getNumberOfCellsInBlock(&subCellGroups[idxSubCellGroup])){
idxChildCell = 0;
idxSubCellGroup += 1;
}
L2L(cell, child, idxLevel, userkernel);
if(FOpenCLGroupOfCells_getCellMortonIndex(&currentCells, idxParentCell) == lastParent){
break;
}
L2L(cell, child, idxLevel, userkernel);
idxParentCell += 1;
}
}
......@@ -884,7 +878,8 @@ __kernel void FOpenCL__directInoutPassPerformMpi(__global unsigned char* contain
struct FOpenCLGroupAttachedLeaf particles = FOpenCLGroupOfParticles_getLeaf(&containers, outsideInteractions[outInterIdx].insideIdxInBlock);
FOpenCLAssertLF(FOpenCLGroupOfParticles_getLeafMortonIndex(&containers, outsideInteractions[outInterIdx].insideIdxInBlock) == outsideInteractions[outInterIdx].insideIndex);
P2PRemote( GetPositionFromMorton(outsideInteractions[outInterIdx].insideIndex, treeHeight-1), particles, particles , interParticles, outsideInteractions[outInterIdx].relativeOutPosition, userkernel);
P2PRemote( GetPositionFromMorton(outsideInteractions[outInterIdx].insideIndex, treeHeight-1), particles, particles ,
interParticles, outsideInteractions[outInterIdx].relativeOutPosition, userkernel);
}
}
}
......@@ -919,7 +914,7 @@ __kernel void FOpenCL__directInPassPerform(__global unsigned char* containersPtr
for(int idxInter = 0 ; idxInter < counter ; ++idxInter){
if( blockStartIdx <= interactionsIndexes[idxInter] && interactionsIndexes[idxInter] < blockEndIdx ){
const int leafPos = FOpenCLGroupOfParticles_getLeafIndex(&containers, interactionsIndexes[idxInter]);
if(leafPos){
if(leafPos != -1){
FOpenCLAssertLF(FOpenCLGroupOfParticles_getLeafMortonIndex(&containers, leafPos) == interactionsIndexes[idxInter]);
interactionsObjects[counterExistingCell] = FOpenCLGroupOfParticles_getLeaf(&containers, leafPos);
neighPosition[counterExistingCell] = interactionsPosition[idxInter];
......@@ -944,16 +939,18 @@ __kernel void FOpenCL__directInoutPassPerform(__global unsigned char* containers
for(int outInterIdx = 0 ; outInterIdx < nbOutsideInteractions ; ++outInterIdx){
const int leafPos = FOpenCLGroupOfParticles_getLeafIndex(&containersOther, outsideInteractions[outInterIdx].outIndex);
if(leafPos != -1){
FOpenCLAssertLF(FOpenCLGroupOfParticles_getLeafMortonIndex(&containersOther, leafPos) == outsideInteractions[outInterIdx].outIndex);
struct FOpenCLGroupAttachedLeaf interParticles = FOpenCLGroupOfParticles_getLeaf(&containersOther, leafPos);
struct FOpenCLGroupAttachedLeaf interParticles = FOpenCLGroupOfParticles_getLeaf(&containersOther, outsideInteractions[outInterIdx].outsideIdxInBlock);
struct FOpenCLGroupAttachedLeaf particles = FOpenCLGroupOfParticles_getLeaf(&containers, outsideInteractions[outInterIdx].insideIdxInBlock);
FOpenCLAssertLF(FOpenCLGroupOfParticles_getLeafMortonIndex(&containers, outsideInteractions[outInterIdx].insideIdxInBlock) == outsideInteractions[outInterIdx].insideIndex);
FOpenCLAssertLF(particles.nbParticles);
FOpenCLAssertLF(interParticles.nbParticles);
P2PRemote( GetPositionFromMorton(outsideInteractions[outInterIdx].insideIndex, treeHeight-1), particles, particles , interParticles, outsideInteractions[outInterIdx].relativeOutPosition, userkernel );
P2POuter( GetPositionFromMorton(outsideInteractions[outInterIdx].insideIndex, treeHeight-1), particles, particles ,
interParticles, outsideInteractions[outInterIdx].relativeOutPosition, userkernel );
P2PRemote( GetPositionFromMorton(outsideInteractions[outInterIdx].outIndex, treeHeight-1), interParticles, interParticles , particles, FMGetOppositeNeighIndex(outsideInteractions[outInterIdx].relativeOutPosition), userkernel);
P2POuter( GetPositionFromMorton(outsideInteractions[outInterIdx].outIndex, treeHeight-1), interParticles, interParticles ,
particles, FMGetOppositeNeighIndex(outsideInteractions[outInterIdx].relativeOutPosition), userkernel);
}
}
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment