Commit 1f77bf7c authored by BRAMAS Berenger's avatar BRAMAS Berenger

Update cuda starpu to use a real class POD

parent df70cc33
This diff is collapsed.
......@@ -6,54 +6,54 @@
#include "../../Utils/FGlobal.hpp"
#include "../FOutOfBlockInteraction.hpp"
template <class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
template <class CellClass, class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__bottomPassCallback(unsigned char* leafCellsPtr, std::size_t leafCellsSize,
unsigned char* containersPtr, std::size_t containersSize,
CudaKernelClass* kernel, cudaStream_t currentStream);
template <class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
template <class CellClass, class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__upwardPassCallback(unsigned char* currentCellsPtr, std::size_t currentCellsSize,
unsigned char* subCellGroupsPtr[9], std::size_t subCellGroupsSize[9],
int nbSubCellGroups, int idxLevel, CudaKernelClass* kernel, cudaStream_t currentStream);
template <class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
template <class CellClass, class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__transferInoutPassCallbackMpi(unsigned char* currentCellsPtr, std::size_t currentCellsSize,
unsigned char* externalCellsPtr, std::size_t externalCellsSize,
int idxLevel, const OutOfBlockInteraction* outsideInteractions,
int nbOutsideInteractions, CudaKernelClass* kernel, cudaStream_t currentStream);
template <class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
template <class CellClass, class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__transferInPassCallback(unsigned char* currentCellsPtr, std::size_t currentCellsSize,
int idxLevel, CudaKernelClass* kernel, cudaStream_t currentStream);
template <class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
template <class CellClass, class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__transferInoutPassCallback(unsigned char* currentCellsPtr, std::size_t currentCellsSize,
unsigned char* externalCellsPtr, std::size_t externalCellsSize,
int idxLevel, const OutOfBlockInteraction* outsideInteractions,
int nbOutsideInteractions, CudaKernelClass* kernel, cudaStream_t currentStream);
template <class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
template <class CellClass, class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__downardPassCallback(unsigned char* currentCellsPtr, std::size_t currentCellsSize,
unsigned char* subCellGroupsPtr[9], std::size_t subCellGroupsSize[9],
int nbSubCellGroups, int idxLevel, CudaKernelClass* kernel, cudaStream_t currentStream);
template <class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
template <class CellClass, class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__directInoutPassCallbackMpi(unsigned char* containersPtr, std::size_t containersSize,
unsigned char* externalContainersPtr, std::size_t externalContainersSize,
const OutOfBlockInteraction* outsideInteractions,
int nbOutsideInteractions, const int treeHeight, CudaKernelClass* kernel, cudaStream_t currentStream);
template <class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
template <class CellClass, class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__directInPassCallback(unsigned char* containersPtr, std::size_t containersSize,
const int treeHeight, CudaKernelClass* kernel, cudaStream_t currentStream);
template <class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
template <class CellClass, class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__directInoutPassCallback(unsigned char* containersPtr, std::size_t containersSize,
unsigned char* externalContainersPtr, std::size_t externalContainersSize,
const OutOfBlockInteraction* outsideInteractions,
int nbOutsideInteractions, const int treeHeight, CudaKernelClass* kernel, cudaStream_t currentStream);
template <class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
template <class CellClass, class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__mergePassCallback(unsigned char* leafCellsPtr, std::size_t leafCellsSize,
unsigned char* containersPtr, std::size_t containersSize,
CudaKernelClass* kernel, cudaStream_t currentStream);
......
#ifndef FCUDAEMPTYCELL_HPP
#define FCUDAEMPTYCELL_HPP
#include "../../Utils/FGlobal.hpp"
#include "../../Containers/FTreeCoordinate.hpp"
#include "../FStarPUDefaultAlign.hpp"
struct alignas(FStarPUDefaultAlign::StructAlign) FCudaEmptyCell {
MortonIndex mortonIndex;
int coordinates[3];
};
#endif // FCUDAEMPTYCELL_HPP
......@@ -4,28 +4,29 @@
#include "FCudaGlobal.hpp"
#include "FCudaGroupAttachedLeaf.hpp"
#include "FCudaEmptyCell.hpp"
/**
* This class defines what should be a Cuda kernel.
*/
template <class ContainerClass = FCudaGroupAttachedLeaf<0, int>>
template <class CellClass = FCudaEmptyCell, class ContainerClass = FCudaGroupAttachedLeaf<0, int>>
class FCudaEmptyKernel {
protected:
public:
__device__ void P2M(unsigned char* const /*pole*/, const ContainerClass* const /*particles*/) {
__device__ void P2M(CellClass* /*pole*/, const ContainerClass* const /*particles*/) {
}
__device__ void M2M(unsigned char* const /*pole*/, const unsigned char *const *const /*child*/, const int /*level*/) {
__device__ void M2M(CellClass* /*pole*/, const CellClass* /*child*/[8], const int /*level*/) {
}
__device__ void M2L(unsigned char* const /*pole*/, const unsigned char* /*distantNeighbors*/[343],
__device__ void M2L(CellClass* /*pole*/, const CellClass* /*distantNeighbors*/[343],
const int /*size*/, const int /*level*/) {
}
__device__ void L2L(const unsigned char*const /*local*/, unsigned char* *const /*child*/, const int /*level*/) {
__device__ void L2L(const CellClass* /*local*/, CellClass* /*child*/[8], const int /*level*/) {
}
__device__ void L2P(const unsigned char* const /*local*/, ContainerClass*const /*particles*/){
__device__ void L2P(const CellClass* /*local*/, ContainerClass*const /*particles*/){
}
__device__ void P2P(const int3& ,
......@@ -38,16 +39,6 @@ public:
ContainerClass* const /*directNeighborsParticles*/[27], const int ){
}
__device__ MortonIndex getMortonIndex(const unsigned char* /*cell*/) const{
return 0;
}
__device__ int3 getCoordinate(const unsigned char* /*cell*/) const{
int3 coord;
coord.x = coord.y = coord.z = 0;
return coord;
}
__host__ static FCudaEmptyKernel* InitKernelKernel(void*){
return nullptr;
}
......
......@@ -9,7 +9,7 @@
/**
* @brief The FCudaGroupOfCells class manages the cells in block allocation.
*/
template <const size_t CellClassSize>
template <class CellClass>
class FCudaGroupOfCells {
/** One header is allocated at the beginning of each block */
struct alignas(FStarPUDefaultAlign::StructAlign) BlockHeader{
......@@ -108,14 +108,14 @@ public:
}
/** Return the address of the cell if it exists (or NULL) */
__device__ unsigned char* getCell(const MortonIndex inIndex){
if( exists(inIndex) ) return &blockCells[CellClassSize*blockIndexesTable[inIndex-blockHeader->startingIndex]];
__device__ CellClass* getCell(const MortonIndex inIndex){
if( exists(inIndex) ) return (CellClass*)(&blockCells[sizeof(CellClass)*blockIndexesTable[inIndex-blockHeader->startingIndex]]);
else return nullptr;
}
/** Return the address of the cell if it exists (or NULL) */
__device__ const unsigned char* getCell(const MortonIndex inIndex) const {
if( exists(inIndex) ) return &blockCells[CellClassSize*blockIndexesTable[inIndex-blockHeader->startingIndex]];
__device__ const CellClass* getCell(const MortonIndex inIndex) const {
if( exists(inIndex) ) return (CellClass*)(&blockCells[sizeof(CellClass)*blockIndexesTable[inIndex-blockHeader->startingIndex]]);
else return nullptr;
}
};
......
......@@ -7,126 +7,61 @@
// We need to describe this cell
#include "../../Components/FTestCell.hpp"
class FTestCellCudaDescriptor {
FTestCell* ptr;
public:
__device__ FTestCellCudaDescriptor(unsigned char* inPtr)
: ptr(reinterpret_cast<FTestCell*>(inPtr)){
}
__device__ long long int& dataUp(){
return ptr->dataUp;
}
__device__ long long int& dataDown(){
return ptr->dataDown;
}
__device__ MortonIndex getMortonIndex() const{
return ptr->mortonIndex;
}
__device__ int3 getCoordinate() const{
const int* coordinate = (const int*)&ptr->coordinate;
int3 coord;
coord.x = coordinate[0];
coord.y = coordinate[1];
coord.z = coordinate[2];
return coord;
}
};
class FTestCellCudaConstDescriptor {
const FTestCell* ptr;
public:
__device__ FTestCellCudaConstDescriptor(const unsigned char* inPtr)
: ptr(reinterpret_cast<const FTestCell*>(inPtr)){
}
__device__ const long long int& dataUp()const {
return ptr->dataUp;
}
__device__ const long long int& dataDown()const {
return ptr->dataDown;
}
__device__ MortonIndex getMortonIndex() const{
return ptr->mortonIndex;
}
__device__ int3 getCoordinate() const{
const int* coordinate = (const int*)&ptr->coordinate;
int3 coord;
coord.x = coordinate[0];
coord.y = coordinate[1];
coord.z = coordinate[2];
return coord;
}
};
template< class ContainerClass >
template< class CellClass, class ContainerClass >
class FTestCudaKernels {
public:
/** Before upward */
__device__ void P2M(unsigned char* const pole, const ContainerClass* const particles) {
__device__ void P2M(CellClass* pole, const ContainerClass* const particles) {
// the pole represents all particles under
if(threadIdx.x == 0){
FTestCellCudaDescriptor cell(pole);
cell.dataUp() += particles->getNbParticles();
pole->dataUp += particles->getNbParticles();
}
}
/** During upward */
__device__ void M2M(unsigned char* const pole, const unsigned char*const*const child, const int /*level*/) {
__device__ void M2M(CellClass* pole, const CellClass* child[8], const int /*level*/) {
if(threadIdx.x == 0) {
FTestCellCudaDescriptor cell(pole);
// A parent represents the sum of the child
for(int idx = 0 ; idx < 8 ; ++idx){
if(child[idx]){
FTestCellCudaConstDescriptor childCell(child[idx]);
cell.dataUp() += childCell.dataUp();
pole->dataUp += child[idx]->dataUp;
}
}
}
}
/** Before Downward */
__device__ void M2L(unsigned char* const local, const unsigned char* distantNeighbors[343], const int /*size*/, const int /*level*/) {
__device__ void M2L(CellClass* local, const CellClass* distantNeighbors[343], const int /*size*/, const int /*level*/) {
if(threadIdx.x == 0) {
FTestCellCudaDescriptor cell(local);
// The pole is impacted by what represent other poles
for(int idx = 0 ; idx < 343 ; ++idx){
if(distantNeighbors[idx]){
FTestCellCudaConstDescriptor interCell(distantNeighbors[idx]);
cell.dataDown() += interCell.dataUp();
local->dataDown += distantNeighbors[idx]->dataUp;
}
}
}
}
/** During Downward */
__device__ void L2L(const unsigned char*const local, unsigned char**const child, const int /*level*/) {
__device__ void L2L(const CellClass* local, CellClass* child[8], const int /*level*/) {
if(threadIdx.x == 0) {
FTestCellCudaConstDescriptor cell(local);
// Each child is impacted by the father
for(int idx = 0 ; idx < 8 ; ++idx){
if(child[idx]){
FTestCellCudaDescriptor cellChild(child[idx]);
cellChild.dataDown() += cell.dataDown();
child[idx]->dataDown += local->dataDown;
}
}
}
}
/** After Downward */
__device__ void L2P(const unsigned char* const local, ContainerClass*const particles){
__device__ void L2P(const CellClass* local, ContainerClass*const particles){
if(threadIdx.x == 0) {
FTestCellCudaConstDescriptor cell(local);
// The particles is impacted by the parent cell
long long int*const particlesAttributes = particles->template getAttribute<0>();
for(int idxPart = 0 ; idxPart < particles->getNbParticles() ; ++idxPart){
particlesAttributes[idxPart] += cell.dataDown();
particlesAttributes[idxPart] += local->dataDown;
}
}
}
......@@ -175,16 +110,6 @@ public:
}
}
__device__ MortonIndex getMortonIndex(const unsigned char* cell) const{
FTestCellCudaConstDescriptor cellAccess(cell);
return cellAccess.getMortonIndex();
}
__device__ int3 getCoordinate(const unsigned char* cell) const{
FTestCellCudaConstDescriptor cellAccess(cell);
return cellAccess.getCoordinate();
}
__host__ static FTestCudaKernels* InitKernelKernel(void*){
return nullptr;
}
......
......@@ -7,6 +7,14 @@
class FCudaTreeCoordinate {
public:
__device__ static int3 ConvertCoordinate(const int coordinate[3]) {
int3 coord;
coord.x = coordinate[0];
coord.y = coordinate[1];
coord.z = coordinate[2];
return coord;
}
__device__ static int3 GetPositionFromMorton(MortonIndex inIndex, const int inLevel){
MortonIndex mask = 0x1LL;
......
......@@ -30,6 +30,7 @@
#include "Cuda/FCudaGroupAttachedLeaf.hpp"
#include "Cuda/FCudaGroupOfParticles.hpp"
#include "Cuda/FCudaGroupOfCells.hpp"
#include "Cuda/FCudaEmptyCell.hpp"
#endif
#ifdef ScalFMM_ENABLE_OPENCL_KERNEL
#include "FStarPUOpenClWrapper.hpp"
......@@ -39,7 +40,7 @@
template <class OctreeClass, class CellContainerClass, class CellClass, class KernelClass, class ParticleGroupClass, class ParticleContainerClass
#ifdef ScalFMM_ENABLE_CUDA_KERNEL
, class CudaCellContainerClass = FCudaGroupOfCells<0>, class CudaParticleGroupClass = FCudaGroupOfParticles<0, int>, class CudaParticleContainerClass = FCudaGroupAttachedLeaf<0, int>,
, class CudaCellClass = FCudaEmptyCell, class CudaCellContainerClass = FCudaGroupOfCells<FCudaEmptyCell>, class CudaParticleGroupClass = FCudaGroupOfParticles<0, int>, class CudaParticleContainerClass = FCudaGroupAttachedLeaf<0, int>,
class CudaKernelClass = FCudaEmptyKernel<>
#endif
#ifdef ScalFMM_ENABLE_OPENCL_KERNEL
......@@ -50,7 +51,7 @@ class FGroupTaskStarPUAlgorithm {
protected:
typedef FGroupTaskStarPUAlgorithm<OctreeClass, CellContainerClass, CellClass, KernelClass, ParticleGroupClass, ParticleContainerClass
#ifdef ScalFMM_ENABLE_CUDA_KERNEL
, CudaCellContainerClass, CudaParticleGroupClass, CudaParticleContainerClass, CudaKernelClass
, CudaCellClass, CudaCellContainerClass, CudaParticleGroupClass, CudaParticleContainerClass, CudaKernelClass
#endif
#ifdef ScalFMM_ENABLE_OPENCL_KERNEL
, OpenCLDeviceWrapperClass
......@@ -90,7 +91,7 @@ protected:
StarPUCpuWrapperClass cpuWrapper;
#endif
#ifdef ScalFMM_ENABLE_CUDA_KERNEL
typedef FStarPUCudaWrapper<KernelClass, CudaCellContainerClass, CudaParticleGroupClass, CudaParticleContainerClass, CudaKernelClass> StarPUCudaWrapperClass;
typedef FStarPUCudaWrapper<KernelClass, CudaCellClass, CudaCellContainerClass, CudaParticleGroupClass, CudaParticleContainerClass, CudaKernelClass> StarPUCudaWrapperClass;
StarPUCudaWrapperClass cudaWrapper;
#endif
#ifdef ScalFMM_ENABLE_OPENCL_KERNEL
......
......@@ -33,12 +33,12 @@
#include "FStarPUUtils.hpp"
template <class KernelClass, class CudaCellGroupClass,
template <class KernelClass, class CellClass, class CudaCellGroupClass,
class CudaParticleGroupClass, class CudaParticleContainerClass,
class CudaKernelClass>
class FStarPUCudaWrapper {
protected:
typedef FStarPUCudaWrapper<KernelClass, CudaCellGroupClass, CudaParticleGroupClass, CudaParticleContainerClass, CudaKernelClass> ThisClass;
typedef FStarPUCudaWrapper<KernelClass, CellClass, CudaCellGroupClass, CudaParticleGroupClass, CudaParticleContainerClass, CudaKernelClass> ThisClass;
template <class OtherBlockClass>
struct BlockInteractions{
......@@ -81,7 +81,7 @@ public:
CudaKernelClass* kernel = worker->get<ThisClass>(FSTARPU_CPU_IDX)->kernels[starpu_worker_get_id()];
FCuda__bottomPassCallback<CudaCellGroupClass, CudaParticleGroupClass, CudaParticleContainerClass, CudaKernelClass>((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[0]),
FCuda__bottomPassCallback<CellClass,CudaCellGroupClass, CudaParticleGroupClass, CudaParticleContainerClass, CudaKernelClass>((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[0]),
STARPU_VARIABLE_GET_ELEMSIZE(buffers[0]),
(unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[1]),
STARPU_VARIABLE_GET_ELEMSIZE(buffers[1]),
......@@ -112,7 +112,7 @@ public:
CudaKernelClass* kernel = worker->get<ThisClass>(FSTARPU_CPU_IDX)->kernels[starpu_worker_get_id()];
FCuda__upwardPassCallback<CudaCellGroupClass, CudaParticleGroupClass, CudaParticleContainerClass, CudaKernelClass>((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[0]),
FCuda__upwardPassCallback<CellClass,CudaCellGroupClass, CudaParticleGroupClass, CudaParticleContainerClass, CudaKernelClass>((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[0]),
STARPU_VARIABLE_GET_ELEMSIZE(buffers[0]),
subCellGroupsPtr,subCellGroupsSize,
nbSubCellGroups, idxLevel, kernel, starpu_cuda_get_local_stream());
......@@ -135,7 +135,7 @@ public:
CudaKernelClass* kernel = worker->get<ThisClass>(FSTARPU_CPU_IDX)->kernels[starpu_worker_get_id()];
FCuda__transferInoutPassCallbackMpi<CudaCellGroupClass, CudaParticleGroupClass, CudaParticleContainerClass, CudaKernelClass>((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[0]),
FCuda__transferInoutPassCallbackMpi<CellClass,CudaCellGroupClass, CudaParticleGroupClass, CudaParticleContainerClass, CudaKernelClass>((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[0]),
STARPU_VARIABLE_GET_ELEMSIZE(buffers[0]),
(unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[1]),
STARPU_VARIABLE_GET_ELEMSIZE(buffers[1]),
......@@ -158,7 +158,7 @@ public:
CudaKernelClass* kernel = worker->get<ThisClass>(FSTARPU_CPU_IDX)->kernels[starpu_worker_get_id()];
FCuda__transferInPassCallback<CudaCellGroupClass, CudaParticleGroupClass, CudaParticleContainerClass, CudaKernelClass>((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[0]),
FCuda__transferInPassCallback<CellClass,CudaCellGroupClass, CudaParticleGroupClass, CudaParticleContainerClass, CudaKernelClass>((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[0]),
STARPU_VARIABLE_GET_ELEMSIZE(buffers[0]),
idxLevel, kernel, starpu_cuda_get_local_stream());
}
......@@ -179,7 +179,7 @@ public:
CudaKernelClass* kernel = worker->get<ThisClass>(FSTARPU_CPU_IDX)->kernels[starpu_worker_get_id()];
FCuda__transferInoutPassCallback<CudaCellGroupClass, CudaParticleGroupClass, CudaParticleContainerClass, CudaKernelClass>((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[0]),
FCuda__transferInoutPassCallback<CellClass,CudaCellGroupClass, CudaParticleGroupClass, CudaParticleContainerClass, CudaKernelClass>((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[0]),
STARPU_VARIABLE_GET_ELEMSIZE(buffers[0]),
(unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[1]),
STARPU_VARIABLE_GET_ELEMSIZE(buffers[1]),
......@@ -210,7 +210,7 @@ public:
CudaKernelClass* kernel = worker->get<ThisClass>(FSTARPU_CPU_IDX)->kernels[starpu_worker_get_id()];
FCuda__downardPassCallback<CudaCellGroupClass, CudaParticleGroupClass, CudaParticleContainerClass, CudaKernelClass>((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[0]),
FCuda__downardPassCallback<CellClass,CudaCellGroupClass, CudaParticleGroupClass, CudaParticleContainerClass, CudaKernelClass>((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[0]),
STARPU_VARIABLE_GET_ELEMSIZE(buffers[0]),
subCellGroupsPtr,subCellGroupsSize,
nbSubCellGroups, idxLevel, kernel, starpu_cuda_get_local_stream());
......@@ -232,7 +232,7 @@ public:
CudaKernelClass* kernel = worker->get<ThisClass>(FSTARPU_CPU_IDX)->kernels[starpu_worker_get_id()];
FCuda__directInoutPassCallbackMpi<CudaCellGroupClass, CudaParticleGroupClass, CudaParticleContainerClass, CudaKernelClass>((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[0]),
FCuda__directInoutPassCallbackMpi<CellClass,CudaCellGroupClass, CudaParticleGroupClass, CudaParticleContainerClass, CudaKernelClass>((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[0]),
STARPU_VARIABLE_GET_ELEMSIZE(buffers[0]),
(unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[1]),
STARPU_VARIABLE_GET_ELEMSIZE(buffers[1]),
......@@ -252,7 +252,7 @@ public:
starpu_codelet_unpack_args(cl_arg, &worker);
CudaKernelClass* kernel = worker->get<ThisClass>(FSTARPU_CPU_IDX)->kernels[starpu_worker_get_id()];
FCuda__directInPassCallback<CudaCellGroupClass, CudaParticleGroupClass, CudaParticleContainerClass, CudaKernelClass>((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[0]),
FCuda__directInPassCallback<CellClass,CudaCellGroupClass, CudaParticleGroupClass, CudaParticleContainerClass, CudaKernelClass>((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[0]),
STARPU_VARIABLE_GET_ELEMSIZE(buffers[0]),
worker->get<ThisClass>(FSTARPU_CPU_IDX)->treeHeight, kernel, starpu_cuda_get_local_stream());
}
......@@ -269,7 +269,7 @@ public:
CudaKernelClass* kernel = worker->get<ThisClass>(FSTARPU_CPU_IDX)->kernels[starpu_worker_get_id()];
FCuda__directInoutPassCallback<CudaCellGroupClass, CudaParticleGroupClass, CudaParticleContainerClass, CudaKernelClass>((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[0]),
FCuda__directInoutPassCallback<CellClass,CudaCellGroupClass, CudaParticleGroupClass, CudaParticleContainerClass, CudaKernelClass>((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[0]),
STARPU_VARIABLE_GET_ELEMSIZE(buffers[0]),
(unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[1]),
STARPU_VARIABLE_GET_ELEMSIZE(buffers[1]),
......@@ -293,7 +293,7 @@ public:
CudaKernelClass* kernel = worker->get<ThisClass>(FSTARPU_CPU_IDX)->kernels[starpu_worker_get_id()];
FCuda__mergePassCallback<CudaCellGroupClass, CudaParticleGroupClass, CudaParticleContainerClass, CudaKernelClass>((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[0]),
FCuda__mergePassCallback<CellClass,CudaCellGroupClass, CudaParticleGroupClass, CudaParticleContainerClass, CudaKernelClass>((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[0]),
STARPU_VARIABLE_GET_ELEMSIZE(buffers[0]),
(unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[1]),
STARPU_VARIABLE_GET_ELEMSIZE(buffers[1]),
......
#ifndef FTESTCELLPOD_HPP
#define FTESTCELLPOD_HPP
#include "../Utils/FGlobal.hpp"
#include "../Containers/FTreeCoordinate.hpp"
#include "FStarPUDefaultAlign.hpp"
struct alignas(FStarPUDefaultAlign::StructAlign) FTestCellPODCore {
MortonIndex mortonIndex;
int coordinates[3];
long long int dataUp, dataDown;
};
class alignas(FStarPUDefaultAlign::StructAlign) FTestCellPOD {
protected:
FTestCellPODCore data;
public:
FTestCellPOD() {
data.mortonIndex = (0);
data.dataUp = (0);
data.dataDown = (0);
data.coordinates[0] = 0;
data.coordinates[1] = 0;
data.coordinates[2] = 0;
}
/** To get the morton index */
MortonIndex getMortonIndex() const {
return data.mortonIndex;
}
/** To set the morton index */
void setMortonIndex(const MortonIndex inMortonIndex) {
data.mortonIndex = inMortonIndex;
}
/** To get the position */
FTreeCoordinate getCoordinate() const {
return FTreeCoordinate(data.coordinates[0],
data.coordinates[1], data.coordinates[2]);
}
/** To set the position */
void setCoordinate(const FTreeCoordinate& inCoordinate) {
data.coordinates[0] = inCoordinate.getX();
data.coordinates[1] = inCoordinate.getY();
data.coordinates[2] = inCoordinate.getZ();
}
/** To set the position from 3 FReals */
void setCoordinate(const int inX, const int inY, const int inZ) {
data.coordinates[0] = inX;
data.coordinates[1] = inY;
data.coordinates[2] = inZ;
}
/** When doing the upward pass */
long long int getDataUp() const {
return data.dataUp;
}
/** When doing the upward pass */
void setDataUp(const long long int inData){
data.dataUp = inData;
}
/** When doing the downard pass */
long long int getDataDown() const {
return data.dataDown;
}
/** When doing the downard pass */
void setDataDown(const long long int inData){
data.dataDown = inData;
}
/** Make it like the begining */
void resetToInitialState(){
data.dataDown = 0;
data.dataUp = 0;
}
/////////////////////////////////////////////////
/** Save the current cell in a buffer */
template <class BufferWriterClass>
void save(BufferWriterClass& buffer) const{
buffer << data.mortonIndex << data.coordinates[0]
<< data.coordinates[1] << data.coordinates[2];
buffer << data.dataDown << data.dataUp;
}
/** Restore the current cell from a buffer */
template <class BufferReaderClass>
void restore(BufferReaderClass& buffer){
buffer >> data.mortonIndex >> data.coordinates[0]
>> data.coordinates[1] >> data.coordinates[2];
buffer >> data.dataDown >> data.dataUp;
}
int getSavedSize() const {
return int(sizeof(data.mortonIndex) + sizeof(data.coordinates[0]) +
sizeof(data.coordinates[1]) + sizeof(data.coordinates[2]) +
sizeof(data.dataDown) + sizeof(data.dataUp));
}
/////////////////////////////////////////////////
/** Serialize only up data in a buffer */
template <class BufferWriterClass>
void serializeUp(BufferWriterClass& buffer) const {
buffer << data.dataUp;
}
/** Deserialize only up data in a buffer */
template <class BufferReaderClass>
void deserializeUp(BufferReaderClass& buffer){
buffer >> data.dataUp;
}
/** Serialize only down data in a buffer */
template <class BufferWriterClass>
void serializeDown(BufferWriterClass& buffer) const {
buffer << data.dataDown;
}
/** Deserialize only up data in a buffer */
template <class BufferReaderClass>
void deserializeDown(BufferReaderClass& buffer){
buffer >> data.dataDown;
}
int getSavedSizeDown() {
return int(sizeof(long long int));
}
int getSavedSizeUp() {
return int(sizeof(long long int));
}
};
static_assert(sizeof(FTestCellPODCore) == sizeof(FTestCellPOD), "Core should be equal to cell class size");
#endif // FTESTCELLPOD_HPP
......@@ -3,6 +3,7 @@
#include "../../Utils/FGlobal.hpp"
#include "../../Components/FTestCell.hpp"
#include "../FStarPUDefaultAlign.hpp"
#include "FTextReplacer.hpp"
struct FTestCell_Alignement{
......@@ -34,6 +35,7 @@ public:
kernelfile.replaceAll("___FCellDownOffset___", FTestCell_Alignement::dataDown);
kernelfile.replaceAll("___FCellMortonOffset___", FTestCell_Alignement::mindex);
kernelfile.replaceAll("___FCellCoordinateOffset___", FTestCell_Alignement::coord);
kernelfile.replaceAll("___DefaultStructAlign___", FStarPUDefaultAlign::StructAlign);
dim = 1;
}
......
......@@ -39,12 +39,14 @@
#include "../../Src/GroupTree/FStarPUKernelCapacities.hpp"
#include "../../Src/GroupTree/FTestCellPOD.hpp"
//#include "../../Src/GroupTree/Cuda/FCudaTestKernels.hpp"
//#include "../../Src/GroupTree/Cuda/FCudaGroupOfParticles.hpp"
//#include "../../Src/GroupTree/Cuda/FCudaGroupAttachedLeaf.hpp"
//#include "../../Src/GroupTree/Cuda/FCudaGroupOfCells.hpp"
template <class ContainerClass>
template <class CellClass, class ContainerClass>
class FTestCudaKernels;
template <unsigned NbAttributesPerParticle, class AttributeClass>
......@@ -53,7 +55,7 @@ class FCudaGroupAttachedLeaf;
template <unsigned NbAttributesPerParticle, class AttributeClass>
class FCudaGroupOfParticles;