Commit bf16a2d7 authored by BRAMAS Berenger's avatar BRAMAS Berenger

WIP for the starpu opencl

parent 444eaafe
......@@ -18,7 +18,7 @@
template <class CellClass>
class FGroupOfCells {
/** One header is allocated at the beginning of each block */
struct BlockHeader{
struct alignas(1) BlockHeader{
MortonIndex startingIndex;
MortonIndex endingIndex;
int numberOfCellsInBlock;
......
......@@ -19,7 +19,7 @@
template <unsigned NbAttributesPerParticle, class AttributeClass = FReal>
class FGroupOfParticles {
/** One header is allocated at the beginning of each block */
struct BlockHeader{
struct alignas(1) BlockHeader{
MortonIndex startingIndex;
MortonIndex endingIndex;
int numberOfLeavesInBlock;
......@@ -36,7 +36,7 @@ class FGroupOfParticles {
};
/** Information about a leaf */
struct LeafHeader {
struct alignas(1) LeafHeader {
int nbParticles;
size_t offSet;
};
......
......@@ -162,7 +162,9 @@ public:
initCodelet();
FLOG(FLog::Controller << "FGroupTaskStarPUAlgorithm (Max Worker " << starpu_worker_get_count() << ")\n");
#ifdef STARPU_USE_CPU
FLOG(FLog::Controller << "FGroupTaskStarPUAlgorithm (Max CPU " << starpu_cpu_worker_get_count() << ")\n");
#endif
#ifdef ScalFMM_ENABLE_OPENCL_KERNEL
FLOG(FLog::Controller << "FGroupTaskStarPUAlgorithm (Max OpenCL " << starpu_opencl_worker_get_count() << ")\n");
#endif
......@@ -172,11 +174,40 @@ public:
}
~FGroupTaskStarPUAlgorithm(){
starpu_resume();
cleanHandle();
delete[] handles_up;
delete[] handles_down;
starpu_resume();
starpu_pthread_mutex_t releaseMutex;
starpu_pthread_mutex_init(&releaseMutex, NULL);
#ifdef STARPU_USE_CPU
FStarPUUtils::ExecOnWorkers(STARPU_CPU, [&](){
starpu_pthread_mutex_lock(&releaseMutex);
cpuWrapper.releaseKernel(starpu_worker_get_id());
starpu_pthread_mutex_unlock(&releaseMutex);
});
wrappers.set(FSTARPU_CPU_IDX, &cpuWrapper);
#endif
#ifdef ScalFMM_ENABLE_CUDA_KERNEL
FStarPUUtils::ExecOnWorkers(STARPU_CUDA, [&](){
starpu_pthread_mutex_lock(&releaseMutex);
cudaWrapper.releaseKernel(starpu_worker_get_id());
starpu_pthread_mutex_unlock(&releaseMutex);
});
wrappers.set(FSTARPU_CUDA_IDX, &cudaWrapper);
#endif
#ifdef ScalFMM_ENABLE_OPENCL_KERNEL
FStarPUUtils::ExecOnWorkers(STARPU_OPENCL, [&](){
starpu_pthread_mutex_lock(&releaseMutex);
openclWrapper.releaseKernel(starpu_worker_get_id());
starpu_pthread_mutex_unlock(&releaseMutex);
});
wrappers.set(FSTARPU_OPENCL_IDX, &openclWrapper);
#endif
starpu_pthread_mutex_destroy(&releaseMutex);
starpu_shutdown();
}
......
......@@ -180,7 +180,9 @@ public:
initCodeletMpi();
FLOG(FLog::Controller << "FGroupTaskStarPUAlgorithm (Max Worker " << starpu_worker_get_count() << ")\n");
#ifdef STARPU_USE_CPU
FLOG(FLog::Controller << "FGroupTaskStarPUAlgorithm (Max CPU " << starpu_cpu_worker_get_count() << ")\n");
#endif
#ifdef ScalFMM_ENABLE_OPENCL_KERNEL
FLOG(FLog::Controller << "FGroupTaskStarPUAlgorithm (Max OpenCL " << starpu_opencl_worker_get_count() << ")\n");
#endif
......@@ -190,12 +192,41 @@ public:
}
~FGroupTaskStarPUMpiAlgorithm(){
starpu_resume();
cleanHandle();
cleanHandleMpi();
delete[] handles_up;
delete[] handles_down;
starpu_resume();
starpu_pthread_mutex_t releaseMutex;
starpu_pthread_mutex_init(&releaseMutex, NULL);
#ifdef STARPU_USE_CPU
FStarPUUtils::ExecOnWorkers(STARPU_CPU, [&](){
starpu_pthread_mutex_lock(&releaseMutex);
cpuWrapper.releaseKernel(starpu_worker_get_id());
starpu_pthread_mutex_unlock(&releaseMutex);
});
wrappers.set(FSTARPU_CPU_IDX, &cpuWrapper);
#endif
#ifdef ScalFMM_ENABLE_CUDA_KERNEL
FStarPUUtils::ExecOnWorkers(STARPU_CUDA, [&](){
starpu_pthread_mutex_lock(&releaseMutex);
cudaWrapper.releaseKernel(starpu_worker_get_id());
starpu_pthread_mutex_unlock(&releaseMutex);
});
wrappers.set(FSTARPU_CUDA_IDX, &cudaWrapper);
#endif
#ifdef ScalFMM_ENABLE_OPENCL_KERNEL
FStarPUUtils::ExecOnWorkers(STARPU_OPENCL, [&](){
starpu_pthread_mutex_lock(&releaseMutex);
openclWrapper.releaseKernel(starpu_worker_get_id());
starpu_pthread_mutex_unlock(&releaseMutex);
});
wrappers.set(FSTARPU_OPENCL_IDX, &openclWrapper);
#endif
starpu_pthread_mutex_destroy(&releaseMutex);
starpu_mpi_shutdown();
starpu_shutdown();
}
......
......@@ -63,9 +63,14 @@ public:
kernels[workerId] = new KernelClass(*originalKernel);
}
void releaseKernel(const int workerId){
delete kernels[workerId];
kernels[workerId] = nullptr;
}
~FStarPUCpuWrapper(){
for(int idxKernel = 0 ; idxKernel < STARPU_MAXCPUS ; ++idxKernel ){
delete kernels[idxKernel];
FAssertLF(kernels[idxKernel] == nullptr);
}
}
......
......@@ -60,11 +60,14 @@ public:
kernels[workerId] = FCuda__BuildCudaKernel<CudaKernelClass>(originalKernel);
}
void releaseKernel(const int workerId){
FCuda__ReleaseCudaKernel(kernels[workerId]);
kernels[workerId] = nullptr;
}
~FStarPUCudaWrapper(){
for(int idxKernel = 0 ; idxKernel < STARPU_MAXCUDADEVS ; ++idxKernel ){
if(kernels[idxKernel]){
FCuda__ReleaseCudaKernel(kernels[idxKernel]);
}
FAssertLF(kernels[idxKernel] == nullptr);
}
}
......
......@@ -58,9 +58,15 @@ public:
kernels[workerId]->initDeviceFromKernel(*originalKernel);
}
void releaseKernel(const int workerId){
kernels[workerId]->releaseKernel();
delete kernels[workerId];
kernels[workerId] = nullptr;
}
~FStarPUOpenClWrapper(){
for(int idxKernel = 0 ; idxKernel < STARPU_MAXOPENCLDEVS ; ++idxKernel ){
delete kernels[idxKernel];
FAssertLF(kernels[idxKernel] == nullptr);
}
}
......@@ -73,6 +79,7 @@ public:
FStarPUPtrInterface* worker = nullptr;
starpu_codelet_unpack_args(cl_arg, &worker);
OpenCLKernelClass* kernel = worker->get<ThisClass>(FSTARPU_OPENCL_IDX)->kernels[starpu_worker_get_id()];
kernel->bottomPassPerform(leafCellsPtr, leafCellsSize, containersPtr, containersSize);
}
......
......@@ -112,17 +112,42 @@ public:
}
virtual void releaseKernel(){
int err;
err = starpu_opencl_release_kernel(kernel_bottomPassPerform);
if(err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
err = starpu_opencl_release_kernel(kernel_upwardPassPerform);
if(err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
err = starpu_opencl_release_kernel(kernel_transferInoutPassPerformMpi);
if(err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
err = starpu_opencl_release_kernel(kernel_transferInPassPerform);
if(err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
err = starpu_opencl_release_kernel(kernel_transferInoutPassPerform);
if(err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
err = starpu_opencl_release_kernel(kernel_downardPassPerform);
if(err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
err = starpu_opencl_release_kernel(kernel_directInoutPassPerformMpi);
if(err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
err = starpu_opencl_release_kernel(kernel_directInoutPassPerform);
if(err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
err = starpu_opencl_release_kernel(kernel_directInPassPerform);
if(err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
err = starpu_opencl_release_kernel(kernel_mergePassPerform);
if(err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
err = starpu_opencl_unload_opencl(&opencl_code);
if(err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
}
virtual ~FOpenCLDeviceWrapper(){
// Release
releaseKernel();
KernelFilenameClass kernelFilename;
const char* filename = kernelFilename;
if(filename){
const int err = starpu_opencl_unload_opencl(&opencl_code);
if(err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
}
}
cl_context& getOpenCLContext(){
......@@ -130,14 +155,39 @@ public:
}
void bottomPassPerform(cl_mem leafCellsPtr, size_t leafCellsSize, cl_mem containersPtr, size_t containersSize){
SetKernelArgs(kernel_bottomPassPerform, 0, &leafCellsPtr, &leafCellsSize, &containersPtr, &containersSize, &user_data);
/*cl_int errcode_ret;
const int size = sizeof(FTestCell);
int* output = new int[size];
cl_mem outputcl = clCreateBuffer(getOpenCLContext(),
CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR,
size*sizeof(int),
(void*)output, &errcode_ret);
FAssertLF(outputcl && errcode_ret == CL_SUCCESS, "OpenCL error code " , errcode_ret);*/
SetKernelArgs(kernel_bottomPassPerform, 0, &leafCellsPtr, &leafCellsSize, &containersPtr, &containersSize, &user_data/*, &outputcl*/);
size_t dim = 1;
const int err = clEnqueueNDRangeKernel(queue_bottomPassPerform, kernel_bottomPassPerform, 1, NULL, &dim, NULL, 0, NULL, NULL);
if (err != CL_SUCCESS) STARPU_OPENCL_REPORT_ERROR(err);
/*errcode_ret = clEnqueueReadBuffer(queue_bottomPassPerform, outputcl,
CL_TRUE, // blocking read
0, // write from the start
sizeof(int) * size,
output, 0, NULL, NULL);
FAssertLF(errcode_ret == CL_SUCCESS, "OpenCL error code " , errcode_ret);
for(int idx = 0 ; idx < 10 ; ++idx){
std::cout << "value " << idx << " = " << output[idx] << "\n";
}
// FTestCell* cell = (FTestCell*)output;
// std::cout << " cell->getDataUp() " << cell->getDataUp() << "\n";
clReleaseMemObject(outputcl);
delete output;*/
}
void upwardPassPerform(cl_mem currentCellsPtr, size_t currentCellsSize, cl_mem subCellGroupsPtr[9], size_t subCellGroupsSize[9], int nbSubCellGroups, int idxLevel){
return; // TODO
Uptr9 ptrs;
memcpy(ptrs.ptrs, subCellGroupsPtr, sizeof(cl_mem)*9);
size_t9 sizes;
......@@ -152,6 +202,7 @@ public:
void transferInoutPassPerformMpi(cl_mem currentCellsPtr,
size_t currentCellsSize, cl_mem externalCellsPtr, size_t externalCellsSize, int idxLevel, cl_mem outsideInteractionsCl,
size_t outsideInteractionsSize){
return; // TODO
SetKernelArgs(kernel_transferInoutPassPerformMpi, 0, &currentCellsPtr,&currentCellsSize, &externalCellsPtr, &externalCellsSize, &idxLevel, &outsideInteractionsCl,
&outsideInteractionsSize, &user_data);
size_t dim = 1;
......@@ -169,6 +220,7 @@ public:
void transferInoutPassPerform(cl_mem currentCellsPtr,
size_t currentCellsSize, cl_mem externalCellsPtr, size_t externalCellsSize, int idxLevel, cl_mem outsideInteractionsCl,
size_t outsideInteractionsSize){
return; // TODO
SetKernelArgs(kernel_transferInoutPassPerform, 0, &currentCellsPtr,&currentCellsSize, &externalCellsPtr, &externalCellsSize, &idxLevel,
&outsideInteractionsCl,&outsideInteractionsSize, &user_data);
size_t dim = 1;
......@@ -178,6 +230,7 @@ public:
void downardPassPerform(cl_mem currentCellsPtr,
size_t currentCellsSize, cl_mem subCellGroupsPtr[9], size_t subCellGroupsSize[9], int nbSubCellGroups, int idxLevel){
return; // TODO
Uptr9 ptrs;
memcpy(ptrs.ptrs, subCellGroupsPtr, sizeof(cl_mem)*9);
size_t9 sizes;
......@@ -193,6 +246,7 @@ public:
void directInoutPassPerformMpi(cl_mem containersPtr,
size_t containersSize, cl_mem externalContainersPtr, size_t externalContainersSize, cl_mem outsideInteractionsCl,
size_t outsideInteractionsSize){
return; // TODO
SetKernelArgs(kernel_directInoutPassPerformMpi, 0, &containersPtr,
&containersSize, &externalContainersPtr, &externalContainersSize, &outsideInteractionsCl,&outsideInteractionsSize, &treeHeight, &user_data);
size_t dim = 1;
......@@ -201,6 +255,7 @@ public:
}
void directInPassPerform(cl_mem containersPtr, size_t containerSize){
return; // TODO
SetKernelArgs(kernel_directInPassPerform, 0, &containersPtr, &containerSize, &treeHeight, &user_data);
size_t dim = 1;
const int err = clEnqueueNDRangeKernel(queue_directInPassPerform, kernel_directInPassPerform, 1, NULL, &dim, NULL, 0, NULL, NULL);
......@@ -210,6 +265,7 @@ public:
void directInoutPassPerform(cl_mem containersPtr,
size_t containerSize, cl_mem externalContainersPtr, size_t externalContainersSize, cl_mem outsideInteractionsCl,
size_t outsideInteractionsSize){
return; // TODO
SetKernelArgs(kernel_directInoutPassPerform, 0, &containersPtr,
&containerSize, &externalContainersPtr, &externalContainersSize, &outsideInteractionsCl, &outsideInteractionsSize, &treeHeight, &user_data);
size_t dim = 1;
......@@ -219,6 +275,7 @@ public:
void mergePassPerform(cl_mem leafCellsPtr,
size_t leafCellsSize, cl_mem containersPtr, size_t containersSize){
return; // TODO
SetKernelArgs(kernel_mergePassPerform, 0, &leafCellsPtr, &leafCellsSize, &containersPtr, &containersSize, &user_data);
size_t dim = 1;
const int err = clEnqueueNDRangeKernel(queue_mergePassPerform, kernel_mergePassPerform, 1, NULL, &dim, NULL, 0, NULL, NULL);
......
......@@ -8,6 +8,8 @@ typedef long long int MortonIndex;
#define FCellClassSize ___FCellClassSize___
#define FCellUpOffset ___FCellUpOffset___
#define FCellDownOffset ___FCellDownOffset___
#define FCellMortonOffset ___FCellMortonOffset___
#define FCellCoordinateOffset ___FCellCoordinateOffset___
#define FOpenCLGroupOfCellsCellIsEmptyFlag ((MortonIndex)-1)
#define NbAttributesPerParticle ___NbAttributesPerParticle___
......@@ -216,10 +218,12 @@ struct FOpenCLGroupAttachedLeaf EmptyFOpenCLGroupAttachedLeaf(){
return leaf;
}
bool FOpenCLGroupAttachedLeaf_isAttachedToSomething(const struct FOpenCLGroupAttachedLeaf* group){
return (group->nbParticles != -1);
}
bool FOpenCLGroupAttachedLeaf_getNbParticles(const struct FOpenCLGroupAttachedLeaf* group){
return (group->nbParticles);
}
/** One header is allocated at the beginning of each block */
......@@ -237,12 +241,13 @@ struct FOpenCLGroupOfParticlesBlockHeader{
size_t attributeOffset;
//< The total number of particles in the group
int nbParticlesInGroup;
};
}__attribute__ ((aligned (1)));
/** Information about a leaf */
struct FOpenCLGroupOfParticlesLeafHeader {
int nbParticles;
size_t offSet;
};
}__attribute__ ((aligned (1)));
struct FOpenCLGroupOfParticles {
......@@ -274,8 +279,8 @@ struct FOpenCLGroupOfParticles BuildFOpenCLGroupOfParticles(__global unsigned ch
// Move the pointers to the correct position
group.blockHeader = ((__global struct FOpenCLGroupOfParticlesBlockHeader*)group.memoryBuffer);
group.blockIndexesTable = ((__global int*)group.memoryBuffer+sizeof(struct FOpenCLGroupOfParticlesBlockHeader));
group.leafHeader = ((__global struct FOpenCLGroupOfParticlesLeafHeader*)group.memoryBuffer+sizeof(struct FOpenCLGroupOfParticlesBlockHeader)+(group.blockHeader->blockIndexesTableSize*sizeof(int)));
group.blockIndexesTable = ((__global int*)(group.memoryBuffer+sizeof(struct FOpenCLGroupOfParticlesBlockHeader)));
group.leafHeader = ((__global struct FOpenCLGroupOfParticlesLeafHeader*)(group.memoryBuffer+sizeof(struct FOpenCLGroupOfParticlesBlockHeader)+(group.blockHeader->blockIndexesTableSize*sizeof(int))));
// Init particle pointers
group.blockHeader->positionOffset = (sizeof(FReal) * group.blockHeader->nbParticlesAllocatedInGroup);
......@@ -286,7 +291,7 @@ struct FOpenCLGroupOfParticles BuildFOpenCLGroupOfParticles(__global unsigned ch
// Redirect pointer to data
group.blockHeader->attributeOffset = (sizeof(FParticleValueClass) * group.blockHeader->nbParticlesAllocatedInGroup);
__global unsigned char* previousPointer = ((__global unsigned char*)group.particlePosition[2] + group.blockHeader->nbParticlesAllocatedInGroup);
__global unsigned char* previousPointer = ((__global unsigned char*)(group.particlePosition[2] + group.blockHeader->nbParticlesAllocatedInGroup));
for(unsigned idxAttribute = 0 ; idxAttribute < NbAttributesPerParticle ; ++idxAttribute){
group.particleAttributes[idxAttribute] = ((__global FParticleValueClass*)previousPointer);
previousPointer += sizeof(FParticleValueClass)*group.blockHeader->nbParticlesAllocatedInGroup;
......@@ -299,6 +304,9 @@ MortonIndex FOpenCLGroupOfParticles_getStartingIndex(const struct FOpenCLGroupOf
MortonIndex FOpenCLGroupOfParticles_getEndingIndex(const struct FOpenCLGroupOfParticles* group) {
return group->blockHeader->endingIndex;
}
int FOpenCLGroupOfParticles_getNumberOfLeaves(const struct FOpenCLGroupOfParticles* group) {
return group->blockHeader->numberOfLeavesInBlock;
}
bool FOpenCLGroupOfParticles_isInside(const struct FOpenCLGroupOfParticles* group, const MortonIndex inIndex) {
return group->blockHeader->startingIndex <= inIndex && inIndex < group->blockHeader->endingIndex;
}
......@@ -323,7 +331,7 @@ struct FOpenCLGroupOfCellsBlockHeader{
MortonIndex endingIndex;
int numberOfCellsInBlock;
int blockIndexesTableSize;
};
} __attribute__ ((aligned (1)));
struct FOpenCLGroupOfCells {
......@@ -420,13 +428,18 @@ void P2PRemote(const int3 pos,
}
MortonIndex getMortonIndex(__global const unsigned char* cell, __global void* user_data) {
return 0;
__global MortonIndex* mindex = (__global MortonIndex*)(cell+FCellMortonOffset);
return (*mindex);
}
int3 getCoordinate(__global const unsigned char* cell, __global void* user_data) {
__global int* cellcoord = (__global int*)(cell+FCellCoordinateOffset);
int3 coord;
coord.x = coord.y = coord.z = 0;
coord.x = cellcoord[0];
coord.y = cellcoord[1];
coord.z = cellcoord[2];
return coord;
}
......@@ -449,13 +462,13 @@ int3 getCoordinate(__global const unsigned char* cell, __global void* user_data)
__kernel void FOpenCL__bottomPassPerform(__global unsigned char* leafCellsPtr, size_t leafCellsSize,
__global unsigned char* containersPtr, size_t containersSize,
__global void* userkernel ){
__global void* userkernel/*, __global int* output */){
struct FOpenCLGroupOfCells leafCells = BuildFOpenCLGroupOfCells(leafCellsPtr, leafCellsSize);
struct FOpenCLGroupOfParticles containers = BuildFOpenCLGroupOfParticles(containersPtr, containersSize);
const MortonIndex blockStartIdx = FOpenCLGroupOfCells_getStartingIndex(&leafCells);
const MortonIndex blockEndIdx = FOpenCLGroupOfCells_getEndingIndex(&leafCells);
for(MortonIndex mindex = blockStartIdx ; mindex < blockEndIdx ; ++mindex){
__global unsigned char* cell = FOpenCLGroupOfCells_getCell(&leafCells, mindex);
if(cell){
......@@ -463,6 +476,25 @@ __kernel void FOpenCL__bottomPassPerform(__global unsigned char* leafCellsPtr, s
struct FOpenCLGroupAttachedLeaf particles = FOpenCLGroupOfParticles_getLeaf(&containers, mindex);
FOpenCLAssertLF(FOpenCLGroupAttachedLeaf_isAttachedToSomething(&particles));
P2M(cell, particles, userkernel);
/*output[0] = blockStartIdx;
output[1] = blockEndIdx;
output[2] = particles.nbParticles;
output[3] = getMortonIndex(cell, userkernel);
output[4] = mindex;
output[5] = FOpenCLGroupOfCells_exists(&leafCells, mindex);
output[6] = FOpenCLGroupOfParticles_getStartingIndex(&containers);
output[7] = FOpenCLGroupOfParticles_getEndingIndex(&containers);
output[8] = FOpenCLGroupOfParticles_getNumberOfLeaves(&containers);
int count = 0;
for(int idx = FOpenCLGroupOfParticles_getStartingIndex(&containers) ; idx < FOpenCLGroupOfParticles_getEndingIndex(&containers) ; ++idx){
if((&containers)->blockIndexesTable[idx - (&containers)->blockHeader->startingIndex] != -1) count++;
}
output[8] = FOpenCLGroupAttachedLeaf_isAttachedToSomething(&particles);
output[9] = FOpenCLGroupAttachedLeaf_getNbParticles(&particles);//sizeof(struct FOpenCLGroupOfParticlesBlockHeader);//count;
//return;//TODO
//__global long long* up = (__global long long*)(((unsigned char*)output)+FCellUpOffset);
//(*up) = particles.nbParticles;
return;*/
}
}
}
......
......@@ -42,13 +42,21 @@
struct FTestCell_Alignement{
static const int dataUp;
static const int dataDown;
static const int mindex;
static const int coord;
};
const int FTestCell_Alignement::dataUp = reinterpret_cast<std::size_t>(&((reinterpret_cast<FTestCell*>(0xF00))->dataUp)) - std::size_t(0xF00);
const int FTestCell_Alignement::dataDown = reinterpret_cast<std::size_t>(&((reinterpret_cast<FTestCell*>(0xF00))->dataDown)) - std::size_t(0xF00);
const int FTestCell_Alignement::mindex = reinterpret_cast<std::size_t>(&((reinterpret_cast<FTestCell*>(0xF00))->mortonIndex)) - std::size_t(0xF00);
const int FTestCell_Alignement::coord = reinterpret_cast<std::size_t>(&((reinterpret_cast<FTestCell*>(0xF00))->coordinate)) - std::size_t(0xF00);
int main(int argc, char* argv[]){
setenv("STARPU_NCPU","0",1);
setenv("STARPU_NOPENCL","1",1);
setenv("STARPU_OPENCL_ONLY_ON_CPUS","1",1);
const FParameterNames LocalOptionBlocSize {
{"-bs"},
"The size of the block of the blocked tree"
......@@ -70,6 +78,8 @@ int main(int argc, char* argv[]){
kernelfile.replaceAll("___NbAttributesPerParticle___", 2);
kernelfile.replaceAll("___FCellUpOffset___", FTestCell_Alignement::dataUp);
kernelfile.replaceAll("___FCellDownOffset___", FTestCell_Alignement::dataDown);
kernelfile.replaceAll("___FCellMortonOffset___", FTestCell_Alignement::mindex);
kernelfile.replaceAll("___FCellCoordinateOffset___", FTestCell_Alignement::coord);
}
operator const char*(){
......@@ -150,7 +160,7 @@ int main(int argc, char* argv[]){
// Run the algorithm
GroupKernelClass groupkernel;
GroupAlgorithm groupalgo(&groupedTree,&groupkernel);
groupalgo.execute();
groupalgo.execute(FFmmP2M); // TODO
// Usual algorithm
KernelClass kernels; // FTestKernels FBasicKernels
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment