Commit 1f77bf7c authored by BRAMAS Berenger's avatar BRAMAS Berenger
Browse files

Update cuda starpu to use a real class POD

parent df70cc33
This diff is collapsed.
...@@ -6,54 +6,54 @@ ...@@ -6,54 +6,54 @@
#include "../../Utils/FGlobal.hpp" #include "../../Utils/FGlobal.hpp"
#include "../FOutOfBlockInteraction.hpp" #include "../FOutOfBlockInteraction.hpp"
template <class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass> template <class CellClass, class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__bottomPassCallback(unsigned char* leafCellsPtr, std::size_t leafCellsSize, void FCuda__bottomPassCallback(unsigned char* leafCellsPtr, std::size_t leafCellsSize,
unsigned char* containersPtr, std::size_t containersSize, unsigned char* containersPtr, std::size_t containersSize,
CudaKernelClass* kernel, cudaStream_t currentStream); CudaKernelClass* kernel, cudaStream_t currentStream);
template <class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass> template <class CellClass, class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__upwardPassCallback(unsigned char* currentCellsPtr, std::size_t currentCellsSize, void FCuda__upwardPassCallback(unsigned char* currentCellsPtr, std::size_t currentCellsSize,
unsigned char* subCellGroupsPtr[9], std::size_t subCellGroupsSize[9], unsigned char* subCellGroupsPtr[9], std::size_t subCellGroupsSize[9],
int nbSubCellGroups, int idxLevel, CudaKernelClass* kernel, cudaStream_t currentStream); int nbSubCellGroups, int idxLevel, CudaKernelClass* kernel, cudaStream_t currentStream);
template <class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass> template <class CellClass, class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__transferInoutPassCallbackMpi(unsigned char* currentCellsPtr, std::size_t currentCellsSize, void FCuda__transferInoutPassCallbackMpi(unsigned char* currentCellsPtr, std::size_t currentCellsSize,
unsigned char* externalCellsPtr, std::size_t externalCellsSize, unsigned char* externalCellsPtr, std::size_t externalCellsSize,
int idxLevel, const OutOfBlockInteraction* outsideInteractions, int idxLevel, const OutOfBlockInteraction* outsideInteractions,
int nbOutsideInteractions, CudaKernelClass* kernel, cudaStream_t currentStream); int nbOutsideInteractions, CudaKernelClass* kernel, cudaStream_t currentStream);
template <class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass> template <class CellClass, class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__transferInPassCallback(unsigned char* currentCellsPtr, std::size_t currentCellsSize, void FCuda__transferInPassCallback(unsigned char* currentCellsPtr, std::size_t currentCellsSize,
int idxLevel, CudaKernelClass* kernel, cudaStream_t currentStream); int idxLevel, CudaKernelClass* kernel, cudaStream_t currentStream);
template <class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass> template <class CellClass, class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__transferInoutPassCallback(unsigned char* currentCellsPtr, std::size_t currentCellsSize, void FCuda__transferInoutPassCallback(unsigned char* currentCellsPtr, std::size_t currentCellsSize,
unsigned char* externalCellsPtr, std::size_t externalCellsSize, unsigned char* externalCellsPtr, std::size_t externalCellsSize,
int idxLevel, const OutOfBlockInteraction* outsideInteractions, int idxLevel, const OutOfBlockInteraction* outsideInteractions,
int nbOutsideInteractions, CudaKernelClass* kernel, cudaStream_t currentStream); int nbOutsideInteractions, CudaKernelClass* kernel, cudaStream_t currentStream);
template <class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass> template <class CellClass, class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__downardPassCallback(unsigned char* currentCellsPtr, std::size_t currentCellsSize, void FCuda__downardPassCallback(unsigned char* currentCellsPtr, std::size_t currentCellsSize,
unsigned char* subCellGroupsPtr[9], std::size_t subCellGroupsSize[9], unsigned char* subCellGroupsPtr[9], std::size_t subCellGroupsSize[9],
int nbSubCellGroups, int idxLevel, CudaKernelClass* kernel, cudaStream_t currentStream); int nbSubCellGroups, int idxLevel, CudaKernelClass* kernel, cudaStream_t currentStream);
template <class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass> template <class CellClass, class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__directInoutPassCallbackMpi(unsigned char* containersPtr, std::size_t containersSize, void FCuda__directInoutPassCallbackMpi(unsigned char* containersPtr, std::size_t containersSize,
unsigned char* externalContainersPtr, std::size_t externalContainersSize, unsigned char* externalContainersPtr, std::size_t externalContainersSize,
const OutOfBlockInteraction* outsideInteractions, const OutOfBlockInteraction* outsideInteractions,
int nbOutsideInteractions, const int treeHeight, CudaKernelClass* kernel, cudaStream_t currentStream); int nbOutsideInteractions, const int treeHeight, CudaKernelClass* kernel, cudaStream_t currentStream);
template <class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass> template <class CellClass, class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__directInPassCallback(unsigned char* containersPtr, std::size_t containersSize, void FCuda__directInPassCallback(unsigned char* containersPtr, std::size_t containersSize,
const int treeHeight, CudaKernelClass* kernel, cudaStream_t currentStream); const int treeHeight, CudaKernelClass* kernel, cudaStream_t currentStream);
template <class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass> template <class CellClass, class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__directInoutPassCallback(unsigned char* containersPtr, std::size_t containersSize, void FCuda__directInoutPassCallback(unsigned char* containersPtr, std::size_t containersSize,
unsigned char* externalContainersPtr, std::size_t externalContainersSize, unsigned char* externalContainersPtr, std::size_t externalContainersSize,
const OutOfBlockInteraction* outsideInteractions, const OutOfBlockInteraction* outsideInteractions,
int nbOutsideInteractions, const int treeHeight, CudaKernelClass* kernel, cudaStream_t currentStream); int nbOutsideInteractions, const int treeHeight, CudaKernelClass* kernel, cudaStream_t currentStream);
template <class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass> template <class CellClass, class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__mergePassCallback(unsigned char* leafCellsPtr, std::size_t leafCellsSize, void FCuda__mergePassCallback(unsigned char* leafCellsPtr, std::size_t leafCellsSize,
unsigned char* containersPtr, std::size_t containersSize, unsigned char* containersPtr, std::size_t containersSize,
CudaKernelClass* kernel, cudaStream_t currentStream); CudaKernelClass* kernel, cudaStream_t currentStream);
......
#ifndef FCUDAEMPTYCELL_HPP
#define FCUDAEMPTYCELL_HPP
#include "../../Utils/FGlobal.hpp"
#include "../../Containers/FTreeCoordinate.hpp"
#include "../FStarPUDefaultAlign.hpp"
struct alignas(FStarPUDefaultAlign::StructAlign) FCudaEmptyCell {
MortonIndex mortonIndex;
int coordinates[3];
};
#endif // FCUDAEMPTYCELL_HPP
...@@ -4,28 +4,29 @@ ...@@ -4,28 +4,29 @@
#include "FCudaGlobal.hpp" #include "FCudaGlobal.hpp"
#include "FCudaGroupAttachedLeaf.hpp" #include "FCudaGroupAttachedLeaf.hpp"
#include "FCudaEmptyCell.hpp"
/** /**
* This class defines what should be a Cuda kernel. * This class defines what should be a Cuda kernel.
*/ */
template <class ContainerClass = FCudaGroupAttachedLeaf<0, int>> template <class CellClass = FCudaEmptyCell, class ContainerClass = FCudaGroupAttachedLeaf<0, int>>
class FCudaEmptyKernel { class FCudaEmptyKernel {
protected: protected:
public: public:
__device__ void P2M(unsigned char* const /*pole*/, const ContainerClass* const /*particles*/) { __device__ void P2M(CellClass* /*pole*/, const ContainerClass* const /*particles*/) {
} }
__device__ void M2M(unsigned char* const /*pole*/, const unsigned char *const *const /*child*/, const int /*level*/) { __device__ void M2M(CellClass* /*pole*/, const CellClass* /*child*/[8], const int /*level*/) {
} }
__device__ void M2L(unsigned char* const /*pole*/, const unsigned char* /*distantNeighbors*/[343], __device__ void M2L(CellClass* /*pole*/, const CellClass* /*distantNeighbors*/[343],
const int /*size*/, const int /*level*/) { const int /*size*/, const int /*level*/) {
} }
__device__ void L2L(const unsigned char*const /*local*/, unsigned char* *const /*child*/, const int /*level*/) { __device__ void L2L(const CellClass* /*local*/, CellClass* /*child*/[8], const int /*level*/) {
} }
__device__ void L2P(const unsigned char* const /*local*/, ContainerClass*const /*particles*/){ __device__ void L2P(const CellClass* /*local*/, ContainerClass*const /*particles*/){
} }
__device__ void P2P(const int3& , __device__ void P2P(const int3& ,
...@@ -38,16 +39,6 @@ public: ...@@ -38,16 +39,6 @@ public:
ContainerClass* const /*directNeighborsParticles*/[27], const int ){ ContainerClass* const /*directNeighborsParticles*/[27], const int ){
} }
__device__ MortonIndex getMortonIndex(const unsigned char* /*cell*/) const{
return 0;
}
__device__ int3 getCoordinate(const unsigned char* /*cell*/) const{
int3 coord;
coord.x = coord.y = coord.z = 0;
return coord;
}
__host__ static FCudaEmptyKernel* InitKernelKernel(void*){ __host__ static FCudaEmptyKernel* InitKernelKernel(void*){
return nullptr; return nullptr;
} }
......
...@@ -9,7 +9,7 @@ ...@@ -9,7 +9,7 @@
/** /**
* @brief The FCudaGroupOfCells class manages the cells in block allocation. * @brief The FCudaGroupOfCells class manages the cells in block allocation.
*/ */
template <const size_t CellClassSize> template <class CellClass>
class FCudaGroupOfCells { class FCudaGroupOfCells {
/** One header is allocated at the beginning of each block */ /** One header is allocated at the beginning of each block */
struct alignas(FStarPUDefaultAlign::StructAlign) BlockHeader{ struct alignas(FStarPUDefaultAlign::StructAlign) BlockHeader{
...@@ -108,14 +108,14 @@ public: ...@@ -108,14 +108,14 @@ public:
} }
/** Return the address of the cell if it exists (or NULL) */ /** Return the address of the cell if it exists (or NULL) */
__device__ unsigned char* getCell(const MortonIndex inIndex){ __device__ CellClass* getCell(const MortonIndex inIndex){
if( exists(inIndex) ) return &blockCells[CellClassSize*blockIndexesTable[inIndex-blockHeader->startingIndex]]; if( exists(inIndex) ) return (CellClass*)(&blockCells[sizeof(CellClass)*blockIndexesTable[inIndex-blockHeader->startingIndex]]);
else return nullptr; else return nullptr;
} }
/** Return the address of the cell if it exists (or NULL) */ /** Return the address of the cell if it exists (or NULL) */
__device__ const unsigned char* getCell(const MortonIndex inIndex) const { __device__ const CellClass* getCell(const MortonIndex inIndex) const {
if( exists(inIndex) ) return &blockCells[CellClassSize*blockIndexesTable[inIndex-blockHeader->startingIndex]]; if( exists(inIndex) ) return (CellClass*)(&blockCells[sizeof(CellClass)*blockIndexesTable[inIndex-blockHeader->startingIndex]]);
else return nullptr; else return nullptr;
} }
}; };
......
...@@ -7,126 +7,61 @@ ...@@ -7,126 +7,61 @@
// We need to describe this cell // We need to describe this cell
#include "../../Components/FTestCell.hpp" #include "../../Components/FTestCell.hpp"
class FTestCellCudaDescriptor {
FTestCell* ptr;
public:
__device__ FTestCellCudaDescriptor(unsigned char* inPtr)
: ptr(reinterpret_cast<FTestCell*>(inPtr)){
}
__device__ long long int& dataUp(){
return ptr->dataUp;
}
__device__ long long int& dataDown(){
return ptr->dataDown;
}
__device__ MortonIndex getMortonIndex() const{
return ptr->mortonIndex;
}
__device__ int3 getCoordinate() const{
const int* coordinate = (const int*)&ptr->coordinate;
int3 coord;
coord.x = coordinate[0];
coord.y = coordinate[1];
coord.z = coordinate[2];
return coord;
}
};
class FTestCellCudaConstDescriptor {
const FTestCell* ptr;
public:
__device__ FTestCellCudaConstDescriptor(const unsigned char* inPtr)
: ptr(reinterpret_cast<const FTestCell*>(inPtr)){
}
__device__ const long long int& dataUp()const { template< class CellClass, class ContainerClass >
return ptr->dataUp;
}
__device__ const long long int& dataDown()const {
return ptr->dataDown;
}
__device__ MortonIndex getMortonIndex() const{
return ptr->mortonIndex;
}
__device__ int3 getCoordinate() const{
const int* coordinate = (const int*)&ptr->coordinate;
int3 coord;
coord.x = coordinate[0];
coord.y = coordinate[1];
coord.z = coordinate[2];
return coord;
}
};
template< class ContainerClass >
class FTestCudaKernels { class FTestCudaKernels {
public: public:
/** Before upward */ /** Before upward */
__device__ void P2M(unsigned char* const pole, const ContainerClass* const particles) { __device__ void P2M(CellClass* pole, const ContainerClass* const particles) {
// the pole represents all particles under // the pole represents all particles under
if(threadIdx.x == 0){ if(threadIdx.x == 0){
FTestCellCudaDescriptor cell(pole); pole->dataUp += particles->getNbParticles();
cell.dataUp() += particles->getNbParticles();
} }
} }
/** During upward */ /** During upward */
__device__ void M2M(unsigned char* const pole, const unsigned char*const*const child, const int /*level*/) { __device__ void M2M(CellClass* pole, const CellClass* child[8], const int /*level*/) {
if(threadIdx.x == 0) { if(threadIdx.x == 0) {
FTestCellCudaDescriptor cell(pole);
// A parent represents the sum of the child // A parent represents the sum of the child
for(int idx = 0 ; idx < 8 ; ++idx){ for(int idx = 0 ; idx < 8 ; ++idx){
if(child[idx]){ if(child[idx]){
FTestCellCudaConstDescriptor childCell(child[idx]); pole->dataUp += child[idx]->dataUp;
cell.dataUp() += childCell.dataUp();
} }
} }
} }
} }
/** Before Downward */ /** Before Downward */
__device__ void M2L(unsigned char* const local, const unsigned char* distantNeighbors[343], const int /*size*/, const int /*level*/) { __device__ void M2L(CellClass* local, const CellClass* distantNeighbors[343], const int /*size*/, const int /*level*/) {
if(threadIdx.x == 0) { if(threadIdx.x == 0) {
FTestCellCudaDescriptor cell(local);
// The pole is impacted by what represent other poles // The pole is impacted by what represent other poles
for(int idx = 0 ; idx < 343 ; ++idx){ for(int idx = 0 ; idx < 343 ; ++idx){
if(distantNeighbors[idx]){ if(distantNeighbors[idx]){
FTestCellCudaConstDescriptor interCell(distantNeighbors[idx]); local->dataDown += distantNeighbors[idx]->dataUp;
cell.dataDown() += interCell.dataUp();
} }
} }
} }
} }
/** During Downward */ /** During Downward */
__device__ void L2L(const unsigned char*const local, unsigned char**const child, const int /*level*/) { __device__ void L2L(const CellClass* local, CellClass* child[8], const int /*level*/) {
if(threadIdx.x == 0) { if(threadIdx.x == 0) {
FTestCellCudaConstDescriptor cell(local);
// Each child is impacted by the father // Each child is impacted by the father
for(int idx = 0 ; idx < 8 ; ++idx){ for(int idx = 0 ; idx < 8 ; ++idx){
if(child[idx]){ if(child[idx]){
FTestCellCudaDescriptor cellChild(child[idx]); child[idx]->dataDown += local->dataDown;
cellChild.dataDown() += cell.dataDown();
} }
} }
} }
} }
/** After Downward */ /** After Downward */
__device__ void L2P(const unsigned char* const local, ContainerClass*const particles){ __device__ void L2P(const CellClass* local, ContainerClass*const particles){
if(threadIdx.x == 0) { if(threadIdx.x == 0) {
FTestCellCudaConstDescriptor cell(local);
// The particles is impacted by the parent cell // The particles is impacted by the parent cell
long long int*const particlesAttributes = particles->template getAttribute<0>(); long long int*const particlesAttributes = particles->template getAttribute<0>();
for(int idxPart = 0 ; idxPart < particles->getNbParticles() ; ++idxPart){ for(int idxPart = 0 ; idxPart < particles->getNbParticles() ; ++idxPart){
particlesAttributes[idxPart] += cell.dataDown(); particlesAttributes[idxPart] += local->dataDown;
} }
} }
} }
...@@ -175,16 +110,6 @@ public: ...@@ -175,16 +110,6 @@ public:
} }
} }
__device__ MortonIndex getMortonIndex(const unsigned char* cell) const{
FTestCellCudaConstDescriptor cellAccess(cell);
return cellAccess.getMortonIndex();
}
__device__ int3 getCoordinate(const unsigned char* cell) const{
FTestCellCudaConstDescriptor cellAccess(cell);
return cellAccess.getCoordinate();
}
__host__ static FTestCudaKernels* InitKernelKernel(void*){ __host__ static FTestCudaKernels* InitKernelKernel(void*){
return nullptr; return nullptr;
} }
......
...@@ -7,6 +7,14 @@ ...@@ -7,6 +7,14 @@
class FCudaTreeCoordinate { class FCudaTreeCoordinate {
public: public:
__device__ static int3 ConvertCoordinate(const int coordinate[3]) {
int3 coord;
coord.x = coordinate[0];
coord.y = coordinate[1];
coord.z = coordinate[2];
return coord;
}
__device__ static int3 GetPositionFromMorton(MortonIndex inIndex, const int inLevel){ __device__ static int3 GetPositionFromMorton(MortonIndex inIndex, const int inLevel){
MortonIndex mask = 0x1LL; MortonIndex mask = 0x1LL;
......
...@@ -30,6 +30,7 @@ ...@@ -30,6 +30,7 @@
#include "Cuda/FCudaGroupAttachedLeaf.hpp" #include "Cuda/FCudaGroupAttachedLeaf.hpp"
#include "Cuda/FCudaGroupOfParticles.hpp" #include "Cuda/FCudaGroupOfParticles.hpp"
#include "Cuda/FCudaGroupOfCells.hpp" #include "Cuda/FCudaGroupOfCells.hpp"
#include "Cuda/FCudaEmptyCell.hpp"
#endif #endif
#ifdef ScalFMM_ENABLE_OPENCL_KERNEL #ifdef ScalFMM_ENABLE_OPENCL_KERNEL
#include "FStarPUOpenClWrapper.hpp" #include "FStarPUOpenClWrapper.hpp"
...@@ -39,7 +40,7 @@ ...@@ -39,7 +40,7 @@
template <class OctreeClass, class CellContainerClass, class CellClass, class KernelClass, class ParticleGroupClass, class ParticleContainerClass template <class OctreeClass, class CellContainerClass, class CellClass, class KernelClass, class ParticleGroupClass, class ParticleContainerClass
#ifdef ScalFMM_ENABLE_CUDA_KERNEL #ifdef ScalFMM_ENABLE_CUDA_KERNEL
, class CudaCellContainerClass = FCudaGroupOfCells<0>, class CudaParticleGroupClass = FCudaGroupOfParticles<0, int>, class CudaParticleContainerClass = FCudaGroupAttachedLeaf<0, int>, , class CudaCellClass = FCudaEmptyCell, class CudaCellContainerClass = FCudaGroupOfCells<FCudaEmptyCell>, class CudaParticleGroupClass = FCudaGroupOfParticles<0, int>, class CudaParticleContainerClass = FCudaGroupAttachedLeaf<0, int>,
class CudaKernelClass = FCudaEmptyKernel<> class CudaKernelClass = FCudaEmptyKernel<>
#endif #endif
#ifdef ScalFMM_ENABLE_OPENCL_KERNEL #ifdef ScalFMM_ENABLE_OPENCL_KERNEL
...@@ -50,7 +51,7 @@ class FGroupTaskStarPUAlgorithm { ...@@ -50,7 +51,7 @@ class FGroupTaskStarPUAlgorithm {
protected: protected:
typedef FGroupTaskStarPUAlgorithm<OctreeClass, CellContainerClass, CellClass, KernelClass, ParticleGroupClass, ParticleContainerClass typedef FGroupTaskStarPUAlgorithm<OctreeClass, CellContainerClass, CellClass, KernelClass, ParticleGroupClass, ParticleContainerClass
#ifdef ScalFMM_ENABLE_CUDA_KERNEL #ifdef ScalFMM_ENABLE_CUDA_KERNEL
, CudaCellContainerClass, CudaParticleGroupClass, CudaParticleContainerClass, CudaKernelClass , CudaCellClass, CudaCellContainerClass, CudaParticleGroupClass, CudaParticleContainerClass, CudaKernelClass
#endif #endif
#ifdef ScalFMM_ENABLE_OPENCL_KERNEL #ifdef ScalFMM_ENABLE_OPENCL_KERNEL
, OpenCLDeviceWrapperClass , OpenCLDeviceWrapperClass
...@@ -90,7 +91,7 @@ protected: ...@@ -90,7 +91,7 @@ protected:
StarPUCpuWrapperClass cpuWrapper; StarPUCpuWrapperClass cpuWrapper;
#endif #endif
#ifdef ScalFMM_ENABLE_CUDA_KERNEL #ifdef ScalFMM_ENABLE_CUDA_KERNEL
typedef FStarPUCudaWrapper<KernelClass, CudaCellContainerClass, CudaParticleGroupClass, CudaParticleContainerClass, CudaKernelClass> StarPUCudaWrapperClass; typedef FStarPUCudaWrapper<KernelClass, CudaCellClass, CudaCellContainerClass, CudaParticleGroupClass, CudaParticleContainerClass, CudaKernelClass> StarPUCudaWrapperClass;
StarPUCudaWrapperClass cudaWrapper; StarPUCudaWrapperClass cudaWrapper;
#endif #endif
#ifdef ScalFMM_ENABLE_OPENCL_KERNEL #ifdef ScalFMM_ENABLE_OPENCL_KERNEL
......
...@@ -33,12 +33,12 @@ ...@@ -33,12 +33,12 @@
#include "FStarPUUtils.hpp" #include "FStarPUUtils.hpp"
template <class KernelClass, class CudaCellGroupClass, template <class KernelClass, class CellClass, class CudaCellGroupClass,
class CudaParticleGroupClass, class CudaParticleContainerClass, class CudaParticleGroupClass, class CudaParticleContainerClass,
class CudaKernelClass> class CudaKernelClass>
class FStarPUCudaWrapper { class FStarPUCudaWrapper {
protected: protected:
typedef FStarPUCudaWrapper<KernelClass, CudaCellGroupClass, CudaParticleGroupClass, CudaParticleContainerClass, CudaKernelClass> ThisClass; typedef FStarPUCudaWrapper<KernelClass, CellClass, CudaCellGroupClass, CudaParticleGroupClass, CudaParticleContainerClass, CudaKernelClass> ThisClass;
template <class OtherBlockClass> template <class OtherBlockClass>
struct BlockInteractions{ struct BlockInteractions{
...@@ -81,7 +81,7 @@ public: ...@@ -81,7 +81,7 @@ public:
CudaKernelClass* kernel = worker->get<ThisClass>(FSTARPU_CPU_IDX)->kernels[starpu_worker_get_id()]; CudaKernelClass* kernel = worker->get<ThisClass>(FSTARPU_CPU_IDX)->kernels[starpu_worker_get_id()];
FCuda__bottomPassCallback<CudaCellGroupClass, CudaParticleGroupClass, CudaParticleContainerClass, CudaKernelClass>((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[0]), FCuda__bottomPassCallback<CellClass,CudaCellGroupClass, CudaParticleGroupClass, CudaParticleContainerClass, CudaKernelClass>((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[0]),
STARPU_VARIABLE_GET_ELEMSIZE(buffers[0]), STARPU_VARIABLE_GET_ELEMSIZE(buffers[0]),
(unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[1]), (unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[1]),
STARPU_VARIABLE_GET_ELEMSIZE(buffers[1]), STARPU_VARIABLE_GET_ELEMSIZE(buffers[1]),
...@@ -112,7 +112,7 @@ public: ...@@ -112,7 +112,7 @@ public:
CudaKernelClass* kernel = worker->get<ThisClass>(FSTARPU_CPU_IDX)->kernels[starpu_worker_get_id()]; CudaKernelClass* kernel = worker->get<ThisClass>(FSTARPU_CPU_IDX)->kernels[starpu_worker_get_id()];
FCuda__upwardPassCallback<CudaCellGroupClass, CudaParticleGroupClass, CudaParticleContainerClass, CudaKernelClass>((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[0]), FCuda__upwardPassCallback<CellClass,CudaCellGroupClass, CudaParticleGroupClass, CudaParticleContainerClass, CudaKernelClass>((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[0]),
STARPU_VARIABLE_GET_ELEMSIZE(buffers[0]), STARPU_VARIABLE_GET_ELEMSIZE(buffers[0]),
subCellGroupsPtr,subCellGroupsSize, subCellGroupsPtr,subCellGroupsSize,
nbSubCellGroups, idxLevel, kernel, starpu_cuda_get_local_stream()); nbSubCellGroups, idxLevel, kernel, starpu_cuda_get_local_stream());
...@@ -135,7 +135,7 @@ public: ...@@ -135,7 +135,7 @@ public:
CudaKernelClass* kernel = worker->get<ThisClass>(FSTARPU_CPU_IDX)->kernels[starpu_worker_get_id()]; CudaKernelClass* kernel = worker->get<ThisClass>(FSTARPU_CPU_IDX)->kernels[starpu_worker_get_id()];
FCuda__transferInoutPassCallbackMpi<CudaCellGroupClass, CudaParticleGroupClass, CudaParticleContainerClass, CudaKernelClass>((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[0]), FCuda__transferInoutPassCallbackMpi<CellClass,CudaCellGroupClass, CudaParticleGroupClass, CudaParticleContainerClass, CudaKernelClass>((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[0]),
STARPU_VARIABLE_GET_ELEMSIZE(buffers[0]), STARPU_VARIABLE_GET_ELEMSIZE(buffers[0]),
(unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[1]), (unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[1]),
STARPU_VARIABLE_GET_ELEMSIZE(buffers[1]), STARPU_VARIABLE_GET_ELEMSIZE(buffers[1]),
...@@ -158,7 +158,7 @@ public: ...@@ -158,7 +158,7 @@ public:
CudaKernelClass* kernel = worker->get<ThisClass>(FSTARPU_CPU_IDX)->kernels[starpu_worker_get_id()]; CudaKernelClass* kernel = worker->get<ThisClass>(FSTARPU_CPU_IDX)->kernels[starpu_worker_get_id()];
FCuda__transferInPassCallback<CudaCellGroupClass, CudaParticleGroupClass, CudaParticleContainerClass, CudaKernelClass>((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[0]), FCuda__transferInPassCallback<CellClass,CudaCellGroupClass, CudaParticleGroupClass, CudaParticleContainerClass, CudaKernelClass>((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[0]),
STARPU_VARIABLE_GET_ELEMSIZE(buffers[0]), STARPU_VARIABLE_GET_ELEMSIZE(buffers[0]),
idxLevel, kernel, starpu_cuda_get_local_stream()); idxLevel, kernel, starpu_cuda_get_local_stream());
} }
...@@ -179,7 +179,7 @@ public: ...@@ -179,7 +179,7 @@ public: