Commit c6ac1155 authored by BRAMAS Berenger's avatar BRAMAS Berenger
Browse files

make cuda compile but cannot test on my laptop

parent 802eb4f4
......@@ -30,7 +30,7 @@
#include "../Cuda/FCudaGroupAttachedLeaf.hpp"
#include "../Cuda/FCudaGroupOfParticles.hpp"
#include "../Cuda/FCudaGroupOfCells.hpp"
#include "../Cuda/FCudaEmptyCell.hpp"
#include "../Cuda/FCudaEmptyCellSymb.hpp"
#endif
#ifdef ScalFMM_ENABLE_OPENCL_KERNEL
#include "../StarPUUtils/FStarPUOpenClWrapper.hpp"
......@@ -40,7 +40,8 @@
template <class OctreeClass, class CellContainerClass, class KernelClass, class ParticleGroupClass, class StarPUCpuWrapperClass
#ifdef ScalFMM_ENABLE_CUDA_KERNEL
, class StarPUCudaWrapperClass = FStarPUCudaWrapper<KernelClass, FCudaEmptyCell, FCudaGroupOfCells<FCudaEmptyCell>, FCudaGroupOfParticles<0, int>, FCudaGroupAttachedLeaf<0, int>, FCudaEmptyKernel<>>
, class StarPUCudaWrapperClass = FStarPUCudaWrapper<KernelClass, FCudaEmptyCellSymb, int, int, FCudaGroupOfCells<FCudaEmptyCellSymb, int, int>,
FCudaGroupOfParticles<0, 0, int>, FCudaGroupAttachedLeaf<0, 0, int>, FCudaEmptyKernel>
#endif
#ifdef ScalFMM_ENABLE_OPENCL_KERNEL
, class StarPUOpenClWrapperClass = FStarPUOpenClWrapper<KernelClass, FOpenCLDeviceWrapper<KernelClass>>
......
//@SCALFMM_PRIVATE
#ifndef FCUDACOMPLETECELL_HPP
#define FCUDACOMPLETECELL_HPP
template <class SymboleCellClass, class PoleCellClass, class LocalCellClass>
struct alignas(FStarPUDefaultAlign::StructAlign) FCudaCompositeCell {
__device__ FCudaCompositeCell()
: symb(nullptr), up(nullptr), down(nullptr){
}
SymboleCellClass* symb;
PoleCellClass* up;
LocalCellClass* down;
};
#endif // FCUDACOMPLETECELL_HPP
This diff is collapsed.
......@@ -7,56 +7,80 @@
#include "../Core/FOutOfBlockInteraction.hpp"
#include "FCudaStructParams.hpp"
template <class CellClass, class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__bottomPassCallback(unsigned char* leafCellsPtr, std::size_t leafCellsSize,
template <class SymboleCellClass, class PoleCellClass, class LocalCellClass,
class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__bottomPassCallback(unsigned char* leafCellsPtr, std::size_t leafCellsSize, unsigned char* leafCellsUpPtr,
unsigned char* containersPtr, std::size_t containersSize,
CudaKernelClass* kernel, cudaStream_t currentStream);
template <class CellClass, class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__upwardPassCallback(unsigned char* currentCellsPtr, std::size_t currentCellsSize,
template <class SymboleCellClass, class PoleCellClass, class LocalCellClass,
class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__upwardPassCallback(
unsigned char* currentCellsPtr, std::size_t currentCellsSize, unsigned char* currentCellsUpPtr,
FCudaParams<unsigned char*,9> subCellGroupsPtr, FCudaParams<std::size_t, 9> subCellGroupsSize,
FCudaParams<unsigned char*,9> subCellGroupsUpPtr,
int nbSubCellGroups, int idxLevel, CudaKernelClass* kernel, cudaStream_t currentStream);
template <class CellClass, class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__transferInoutPassCallbackMpi(unsigned char* currentCellsPtr, std::size_t currentCellsSize,
unsigned char* externalCellsPtr, std::size_t externalCellsSize,
template <class SymboleCellClass, class PoleCellClass, class LocalCellClass,
class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__transferInoutPassCallbackMpi(
unsigned char* currentCellsPtr, std::size_t currentCellsSize, unsigned char* currentCellsDownPtr,
unsigned char* externalCellsPtr, std::size_t externalCellsSize, unsigned char* externalCellsUpPtr,
int idxLevel, const OutOfBlockInteraction* outsideInteractions,
int nbOutsideInteractions, CudaKernelClass* kernel, cudaStream_t currentStream);
template <class CellClass, class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__transferInPassCallback(unsigned char* currentCellsPtr, std::size_t currentCellsSize,
template <class SymboleCellClass, class PoleCellClass, class LocalCellClass,
class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__transferInPassCallback(
unsigned char* currentCellsPtr, std::size_t currentCellsSize,
unsigned char* currentCellsUpPtr, unsigned char* currentCellsDownPtr,
int idxLevel, CudaKernelClass* kernel, cudaStream_t currentStream);
template <class CellClass, class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__transferInoutPassCallback(unsigned char* currentCellsPtr, std::size_t currentCellsSize,
template <class SymboleCellClass, class PoleCellClass, class LocalCellClass,
class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__transferInoutPassCallback(
unsigned char* currentCellsPtr, std::size_t currentCellsSize,
unsigned char* currentCellsUpPtr, unsigned char* currentCellsDownPtr,
unsigned char* externalCellsPtr, std::size_t externalCellsSize,
unsigned char* externalCellsUpPtr, unsigned char* externalCellsDownPtr,
int idxLevel, const OutOfBlockInteraction* outsideInteractions,
int nbOutsideInteractions, CudaKernelClass* kernel, cudaStream_t currentStream);
template <class CellClass, class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__downardPassCallback(unsigned char* currentCellsPtr, std::size_t currentCellsSize,
template <class SymboleCellClass, class PoleCellClass, class LocalCellClass,
class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__downardPassCallback(
unsigned char* currentCellsPtr, std::size_t currentCellsSize, unsigned char* currentCellsDownPtr,
FCudaParams<unsigned char*,9> subCellGroupsPtr, FCudaParams<std::size_t,9> subCellGroupsSize,
FCudaParams<unsigned char*,9> subCellGroupsDownPtr,
int nbSubCellGroups, int idxLevel, CudaKernelClass* kernel, cudaStream_t currentStream);
template <class CellClass, class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__directInoutPassCallbackMpi(unsigned char* containersPtr, std::size_t containersSize,
template <class SymboleCellClass, class PoleCellClass, class LocalCellClass,
class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__directInoutPassCallbackMpi(
unsigned char* containersPtr, std::size_t containersSize, unsigned char* containersDownPtr,
unsigned char* externalContainersPtr, std::size_t externalContainersSize,
const OutOfBlockInteraction* outsideInteractions,
int nbOutsideInteractions, const int treeHeight, CudaKernelClass* kernel, cudaStream_t currentStream);
template <class CellClass, class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__directInPassCallback(unsigned char* containersPtr, std::size_t containersSize,
template <class SymboleCellClass, class PoleCellClass, class LocalCellClass,
class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__directInPassCallback(
unsigned char* containersPtr, std::size_t containersSize, unsigned char* containersDownPtr,
const int treeHeight, CudaKernelClass* kernel, cudaStream_t currentStream);
template <class CellClass, class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__directInoutPassCallback(unsigned char* containersPtr, std::size_t containersSize,
unsigned char* externalContainersPtr, std::size_t externalContainersSize,
template <class SymboleCellClass, class PoleCellClass, class LocalCellClass,
class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__directInoutPassCallback(
unsigned char* containersPtr, std::size_t containersSize, unsigned char* containersDownPtr,
unsigned char* externalContainersPtr, std::size_t externalContainersSize, unsigned char* externalContainersDownPtr,
const OutOfBlockInteraction* outsideInteractions,
int nbOutsideInteractions, const int treeHeight, CudaKernelClass* kernel, cudaStream_t currentStream);
template <class CellClass, class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__mergePassCallback(unsigned char* leafCellsPtr, std::size_t leafCellsSize,
unsigned char* containersPtr, std::size_t containersSize,
template <class SymboleCellClass, class PoleCellClass, class LocalCellClass,
class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__mergePassCallback(
unsigned char* leafCellsPtr, std::size_t leafCellsSize, unsigned char* leafCellsDownPtr,
unsigned char* containersPtr, std::size_t containersSize, unsigned char* containersDownPtr,
CudaKernelClass* kernel, cudaStream_t currentStream);
template <class CudaKernelClass>
......
#ifndef FCUDAEMPTYCELL_HPP
#define FCUDAEMPTYCELL_HPP
#ifndef FCUDAEMPTYCELLSYMB_HPP
#define FCUDAEMPTYCELLSYMB_HPP
#include "../../Utils/FGlobal.hpp"
#include "../../Containers/FTreeCoordinate.hpp"
#include "../StarPUUtils/FStarPUDefaultAlign.hpp"
struct alignas(FStarPUDefaultAlign::StructAlign) FCudaEmptyCell {
struct alignas(FStarPUDefaultAlign::StructAlign) FCudaEmptyCellSymb {
MortonIndex mortonIndex;
int coordinates[3];
};
#endif // FCUDAEMPTYCELL_HPP
#endif // FCUDAEMPTYCELLSYMB_HPP
......@@ -4,29 +4,32 @@
#include "FCudaGlobal.hpp"
#include "FCudaGroupAttachedLeaf.hpp"
#include "FCudaEmptyCell.hpp"
#include "FCudaEmptyCellSymb.hpp"
#include "FCudaCompositeCell.hpp"
/**
* This class defines what should be a Cuda kernel.
*/
template <class CellClass = FCudaEmptyCell, class ContainerClass = FCudaGroupAttachedLeaf<0, int>>
class FCudaEmptyKernel {
protected:
public:
__device__ void P2M(CellClass* /*pole*/, const ContainerClass* const /*particles*/) {
typedef FCudaGroupAttachedLeaf<0,0,int> ContainerClass;
typedef FCudaCompositeCell<FCudaEmptyCellSymb,int,int> CellClass;
__device__ void P2M(CellClass /*pole*/, const ContainerClass* const /*particles*/) {
}
__device__ void M2M(CellClass* /*pole*/, const CellClass* /*child*/[8], const int /*level*/) {
__device__ void M2M(CellClass /*pole*/, const CellClass /*child*/[8], const int /*level*/) {
}
__device__ void M2L(CellClass* /*pole*/, const CellClass* /*distantNeighbors*/[343],
__device__ void M2L(CellClass /*pole*/, const CellClass /*distantNeighbors*/[343],
const int /*size*/, const int /*level*/) {
}
__device__ void L2L(const CellClass* /*local*/, CellClass* /*child*/[8], const int /*level*/) {
__device__ void L2L(const CellClass /*local*/, CellClass /*child*/[8], const int /*level*/) {
}
__device__ void L2P(const CellClass* /*local*/, ContainerClass*const /*particles*/){
__device__ void L2P(const CellClass /*local*/, ContainerClass*const /*particles*/){
}
__device__ void P2P(const int3& ,
......
......@@ -4,7 +4,7 @@
#include "FCudaGlobal.hpp"
template <unsigned NbAttributesPerParticle, class AttributeClass = FReal>
template <unsigned NbSymbAttributes, unsigned NbAttributesPerParticle, class AttributeClass = FReal>
class FCudaGroupAttachedLeaf {
protected:
//< Nb of particles in the current leaf
......@@ -12,13 +12,13 @@ protected:
//< Pointers to the positions of the particles
FReal* positionsPointers[3];
//< Pointers to the attributes of the particles
AttributeClass* attributes[NbAttributesPerParticle];
AttributeClass* attributes[NbSymbAttributes+NbAttributesPerParticle];
public:
/** Empty constructor to point to nothing */
__device__ FCudaGroupAttachedLeaf() : nbParticles(-1) {
memset(positionsPointers, 0, sizeof(FReal*) * 3);
memset(attributes, 0, sizeof(AttributeClass*) * NbAttributesPerParticle);
memset(attributes, 0, sizeof(AttributeClass*) * (NbSymbAttributes+NbAttributesPerParticle));
}
/**
......@@ -37,9 +37,19 @@ public:
positionsPointers[1] = reinterpret_cast<FReal*>(reinterpret_cast<unsigned char*>(inPositionBuffer) + inLeadingPosition);
positionsPointers[2] = reinterpret_cast<FReal*>(reinterpret_cast<unsigned char*>(inPositionBuffer) + inLeadingPosition*2);
unsigned char* symAttributes = reinterpret_cast<unsigned char*>(reinterpret_cast<unsigned char*>(inPositionBuffer) + inLeadingPosition*3);
for(unsigned idxAttribute = 0 ; idxAttribute < NbSymbAttributes ; ++idxAttribute){
attributes[idxAttribute] = reinterpret_cast<AttributeClass*>(symAttributes + idxAttribute*inLeadingAttributes);
}
// Redirect pointers to data
for(unsigned idxAttribute = 0 ; idxAttribute < NbAttributesPerParticle ; ++idxAttribute){
attributes[idxAttribute] = reinterpret_cast<AttributeClass*>(reinterpret_cast<unsigned char*>(inAttributesBuffer) + idxAttribute*inLeadingAttributes);
if(inAttributesBuffer){
for(unsigned idxAttribute = 0 ; idxAttribute < NbAttributesPerParticle ; ++idxAttribute){
attributes[idxAttribute+NbSymbAttributes] = reinterpret_cast<AttributeClass*>(reinterpret_cast<unsigned char*>(inAttributesBuffer) + idxAttribute*inLeadingAttributes);
}
}
else{
memset(&attributes[NbSymbAttributes], 0, sizeof(AttributeClass*)*NbAttributesPerParticle);
}
}
......
......@@ -3,13 +3,13 @@
#define FCUDAGROUPOFCELLS_HPP
#include "FCudaGlobal.hpp"
#include "FCudaCompositeCell.hpp"
#include "../StarPUUtils/FStarPUDefaultAlign.hpp"
/**
* @brief The FCudaGroupOfCells class manages the cells in block allocation.
*/
template <class CellClass>
template <class SymboleCellClass, class PoleCellClass, class LocalCellClass>
class FCudaGroupOfCells {
/** One header is allocated at the beginning of each block */
struct alignas(FStarPUDefaultAlign::StructAlign) BlockHeader{
......@@ -30,23 +30,34 @@ protected:
//< Pointer to the indexes table inside the block memory
int* blockIndexesTable;
//< Pointer to the cells inside the block memory
unsigned char* blockCells;
SymboleCellClass* blockCells;
//< This value is for not used cells
static const MortonIndex CellIsEmptyFlag = -1;
//< The multipole data
PoleCellClass* cellMultipoles;
//< The local data
LocalCellClass* cellLocals;
public:
typedef FCudaCompositeCell<SymboleCellClass, PoleCellClass, LocalCellClass> CompleteCellClass;
__device__ FCudaGroupOfCells()
: allocatedMemoryInByte(0), memoryBuffer(nullptr),
blockHeader(nullptr), blockIndexesTable(nullptr), blockCells(nullptr){
blockHeader(nullptr), blockIndexesTable(nullptr), blockCells(nullptr),
cellMultipoles(nullptr), cellLocals(nullptr){
}
__device__ void reset(unsigned char* inBuffer, const size_t inAllocatedMemoryInByte){
__device__ void reset(unsigned char* inBuffer, const size_t inAllocatedMemoryInByte,
unsigned char* inCellMultipoles, unsigned char* inCellLocals){
// Move the pointers to the correct position
allocatedMemoryInByte = (inAllocatedMemoryInByte);
memoryBuffer = (inBuffer);
blockHeader = reinterpret_cast<BlockHeader*>(memoryBuffer);
blockIndexesTable = reinterpret_cast<int*>(memoryBuffer+sizeof(BlockHeader));
blockCells = reinterpret_cast<unsigned char*>(memoryBuffer+sizeof(BlockHeader)+(blockHeader->blockIndexesTableSize*sizeof(int)));
blockCells = reinterpret_cast<SymboleCellClass*>(memoryBuffer+sizeof(BlockHeader)+(blockHeader->blockIndexesTableSize*sizeof(int)));
cellMultipoles = (PoleCellClass*)inCellMultipoles;
cellLocals = (LocalCellClass*)inCellLocals;
}
/**
......@@ -54,27 +65,17 @@ public:
* @param inBuffer
* @param inAllocatedMemoryInByte
*/
__device__ FCudaGroupOfCells(unsigned char* inBuffer, const size_t inAllocatedMemoryInByte)
__device__ FCudaGroupOfCells(unsigned char* inBuffer, const size_t inAllocatedMemoryInByte,
unsigned char* inCellMultipoles, unsigned char* inCellLocals)
: allocatedMemoryInByte(inAllocatedMemoryInByte), memoryBuffer(inBuffer),
blockHeader(nullptr), blockIndexesTable(nullptr), blockCells(nullptr){
blockHeader(nullptr), blockIndexesTable(nullptr), blockCells(nullptr),
cellMultipoles(nullptr), cellLocals(nullptr){
// Move the pointers to the correct position
blockHeader = reinterpret_cast<BlockHeader*>(memoryBuffer);
blockIndexesTable = reinterpret_cast<int*>(memoryBuffer+sizeof(BlockHeader));
blockCells = reinterpret_cast<unsigned char*>(memoryBuffer+sizeof(BlockHeader)+(blockHeader->blockIndexesTableSize*sizeof(int)));
}
/** Call the destructor of cells and dealloc block memory */
__device__ ~FCudaGroupOfCells(){
}
/** Give access to the buffer to send the data */
__device__ const unsigned char* getRawBuffer() const{
return memoryBuffer;
}
/** The the size of the allocated buffer */
__device__ int getBufferSizeInByte() const {
return allocatedMemoryInByte;
blockCells = reinterpret_cast<SymboleCellClass*>(memoryBuffer+sizeof(BlockHeader)+(blockHeader->blockIndexesTableSize*sizeof(int)));
cellMultipoles = (PoleCellClass*)inCellMultipoles;
cellLocals = (LocalCellClass*)inCellLocals;
}
/** The index of the fist cell (set from the constructor) */
......@@ -108,16 +109,65 @@ public:
}
/** Return the address of the cell if it exists (or NULL) */
__device__ CellClass* getCell(const MortonIndex inIndex){
if( exists(inIndex) ) return (CellClass*)(&blockCells[sizeof(CellClass)*blockIndexesTable[inIndex-blockHeader->startingIndex]]);
else return nullptr;
__device__ CompleteCellClass getCompleteCell(const MortonIndex inIndex){
//FAssertLF(cellMultipoles && cellLocals);
if( exists(inIndex) ){
CompleteCellClass cell;
const int cellPos = blockIndexesTable[inIndex-blockHeader->startingIndex];
cell.symb = &blockCells[cellPos];
cell.up = &cellMultipoles[cellPos];
cell.down = &cellLocals[cellPos];
return cell;
}
else{
CompleteCellClass cell;
cell.symb = nullptr;
cell.up = nullptr;
cell.down = nullptr;
return cell;
}
}
/** Return the address of the cell if it exists (or NULL) */
__device__ CompleteCellClass getUpCell(const MortonIndex inIndex){
//FAssertLF(cellMultipoles);
if( exists(inIndex) ){
CompleteCellClass cell;
const int cellPos = blockIndexesTable[inIndex-blockHeader->startingIndex];
cell.symb = &blockCells[cellPos];
cell.up = &cellMultipoles[cellPos];
cell.down = nullptr;
return cell;
}
else{
CompleteCellClass cell;
cell.symb = nullptr;
cell.up = nullptr;
cell.down = nullptr;
return cell;
}
}
/** Return the address of the cell if it exists (or NULL) */
__device__ const CellClass* getCell(const MortonIndex inIndex) const {
if( exists(inIndex) ) return (CellClass*)(&blockCells[sizeof(CellClass)*blockIndexesTable[inIndex-blockHeader->startingIndex]]);
else return nullptr;
__device__ CompleteCellClass getDownCell(const MortonIndex inIndex){
//FAssertLF(cellLocals);
if( exists(inIndex) ){
CompleteCellClass cell;
const int cellPos = blockIndexesTable[inIndex-blockHeader->startingIndex];
cell.symb = &blockCells[cellPos];
cell.up = nullptr;
cell.down = &cellLocals[cellPos];
return cell;
}
else{
CompleteCellClass cell;
cell.symb = nullptr;
cell.up = nullptr;
cell.down = nullptr;
return cell;
}
}
};
#endif // FCUDAGROUPOFCELLS_HPP
......
......@@ -5,7 +5,7 @@
#include "FCudaGlobal.hpp"
#include "../StarPUUtils/FStarPUDefaultAlign.hpp"
template <unsigned NbAttributesPerParticle, class AttributeClass = FReal>
template <unsigned NbSymbAttributes, unsigned NbAttributesPerParticle, class AttributeClass = FReal>
class FCudaGroupOfParticles {
/** One header is allocated at the beginning of each block */
struct alignas(FStarPUDefaultAlign::StructAlign) BlockHeader{
......@@ -66,7 +66,8 @@ protected:
FReal* particlePosition[3];
//< Pointers to the particles data inside the block memory
AttributeClass* particleAttributes[NbAttributesPerParticle];
AttributeClass* attributesBuffer;
AttributeClass* particleAttributes[NbSymbAttributes+NbAttributesPerParticle];
public:
/**
......@@ -74,9 +75,11 @@ public:
* @param inBuffer
* @param inAllocatedMemoryInByte
*/
__device__ FCudaGroupOfParticles(unsigned char* inBuffer, const size_t inAllocatedMemoryInByte)
__device__ FCudaGroupOfParticles(unsigned char* inBuffer, const size_t inAllocatedMemoryInByte,
unsigned char* inAttributes)
: allocatedMemoryInByte(inAllocatedMemoryInByte), memoryBuffer(inBuffer),
blockHeader(nullptr), blockIndexesTable(nullptr), leafHeader(nullptr), nbParticlesInGroup(0){
blockHeader(nullptr), blockIndexesTable(nullptr), leafHeader(nullptr), nbParticlesInGroup(0),
attributesBuffer(nullptr){
// Move the pointers to the correct position
blockHeader = reinterpret_cast<BlockHeader*>(memoryBuffer);
blockIndexesTable = reinterpret_cast<int*>(memoryBuffer+sizeof(BlockHeader));
......@@ -91,25 +94,17 @@ public:
// Redirect pointer to data
blockHeader->attributeOffset = (sizeof(AttributeClass) * blockHeader->nbParticlesAllocatedInGroup);
unsigned char* previousPointer = reinterpret_cast<unsigned char*>(particlePosition[2] + blockHeader->nbParticlesAllocatedInGroup);
for(unsigned idxAttribute = 0 ; idxAttribute < NbAttributesPerParticle ; ++idxAttribute){
particleAttributes[idxAttribute] = reinterpret_cast<AttributeClass*>(previousPointer);
previousPointer += sizeof(AttributeClass)*blockHeader->nbParticlesAllocatedInGroup;
AttributeClass* symAttributes = (AttributeClass*)(&particlePosition[2][blockHeader->nbParticlesAllocatedInGroup]);
for(unsigned idxAttribute = 0 ; idxAttribute < NbSymbAttributes ; ++idxAttribute){
particleAttributes[idxAttribute] = symAttributes;
symAttributes += blockHeader->nbParticlesAllocatedInGroup;
}
if(inAttributes){
attributesBuffer = (AttributeClass*)inAttributes;
for(unsigned idxAttribute = 0 ; idxAttribute < NbAttributesPerParticle ; ++idxAttribute){
particleAttributes[idxAttribute+NbSymbAttributes] = &attributesBuffer[idxAttribute*blockHeader->nbParticlesAllocatedInGroup];
}
}
}
/** Call the destructor of leaves and dealloc block memory */
__device__ ~FCudaGroupOfParticles(){
}
/** Give access to the buffer to send the data */
__device__ const unsigned char* getRawBuffer() const{
return memoryBuffer;
}
/** The the size of the allocated buffer */
__device__ int getBufferSizeInByte() const {
return allocatedMemoryInByte;
}
/** The index of the fist leaf (set from the constructor) */
......
......@@ -33,12 +33,13 @@
#include "FStarPUUtils.hpp"
template <class KernelClass, class CellClass, class CudaCellGroupClass,
class CudaParticleGroupClass, class CudaParticleContainerClass,
template <class KernelClass, class SymboleCellClass, class PoleCellClass, class LocalCellClass,
class CudaCellGroupClass, class CudaParticleGroupClass, class CudaParticleContainerClass,
class CudaKernelClass>
class FStarPUCudaWrapper {
protected:
typedef FStarPUCudaWrapper<KernelClass, CellClass, CudaCellGroupClass, CudaParticleGroupClass, CudaParticleContainerClass, CudaKernelClass> ThisClass;
typedef FStarPUCudaWrapper<KernelClass, SymboleCellClass, PoleCellClass, LocalCellClass,
CudaCellGroupClass, CudaParticleGroupClass, CudaParticleContainerClass, CudaKernelClass> ThisClass;
template <class OtherBlockClass>
struct BlockInteractions{
......@@ -72,20 +73,19 @@ public:
}
static void bottomPassCallback(void *buffers[], void *cl_arg){
//CudaCellGroupClass leafCells((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[0]),
// STARPU_VARIABLE_GET_ELEMSIZE(buffers[0]));
//CudaParticleGroupClass containers((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[1]),
// STARPU_VARIABLE_GET_ELEMSIZE(buffers[1]));
FStarPUPtrInterface* worker = nullptr;
starpu_codelet_unpack_args(cl_arg, &worker);
CudaKernelClass* kernel = worker->get<ThisClass>(FSTARPU_CPU_IDX)->kernels[starpu_worker_get_id()];
FCuda__bottomPassCallback<CellClass,CudaCellGroupClass, CudaParticleGroupClass, CudaParticleContainerClass, CudaKernelClass>((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[0]),
STARPU_VARIABLE_GET_ELEMSIZE(buffers[0]),
(unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[1]),
STARPU_VARIABLE_GET_ELEMSIZE(buffers[1]),
kernel, starpu_cuda_get_local_stream());
FCuda__bottomPassCallback< SymboleCellClass, PoleCellClass, LocalCellClass,
CudaCellGroupClass, CudaParticleGroupClass, CudaParticleContainerClass, CudaKernelClass>(
(unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[0]),
STARPU_VARIABLE_GET_ELEMSIZE(buffers[0]),
(unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[1]),
(unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[2]),
STARPU_VARIABLE_GET_ELEMSIZE(buffers[2]),
kernel, starpu_cuda_get_local_stream());
}
/////////////////////////////////////////////////////////////////////////////////////
......@@ -93,9 +93,6 @@ public:
/////////////////////////////////////////////////////////////////////////////////////
static void upwardPassCallback(void *buffers[], void *cl_arg){
//CudaCellGroupClass currentCells((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[0]),
// STARPU_VARIABLE_GET_ELEMSIZE(buffers[0]));
FStarPUPtrInterface* worker = nullptr;
int nbSubCellGroups = 0;
int idxLevel = 0;
......@@ -105,16 +102,22 @@ public:
memset(&subCellGroupsPtr, 0, sizeof(subCellGroupsPtr));
FCudaParams<std::size_t,9> subCellGroupsSize;
memset(&subCellGroupsPtr, 0, sizeof(subCellGroupsSize));
FCudaParams<unsigned char*,9> subCellGroupsUpPtr;
memset(&subCellGroupsUpPtr, 0, sizeof(subCellGroupsUpPtr));
for(int idxSubGroup = 0; idxSubGroup < nbSubCellGroups ; ++idxSubGroup){
subCellGroupsPtr.values[idxSubGroup] = ((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[idxSubGroup+1]));
subCellGroupsSize.values[idxSubGroup] = STARPU_VARIABLE_GET_ELEMSIZE(buffers[idxSubGroup+1]);
subCellGroupsPtr.values[idxSubGroup] = ((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[(idxSubGroup*2)+2]));
subCellGroupsSize.values[idxSubGroup] = STARPU_VARIABLE_GET_ELEMSIZE(buffers[(idxSubGroup*2)+2]);
subCellGroupsUpPtr.values[idxSubGroup] = (unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[(idxSubGroup*2)+3]);
}
CudaKernelClass* kernel = worker->get<ThisClass>(FSTARPU_CPU_IDX)->kernels[starpu_worker_get_id()];
FCuda__upwardPassCallback<CellClass,CudaCellGroupClass, CudaParticleGroupClass, CudaParticleContainerClass, CudaKernelClass>((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[0]),
FCuda__upwardPassCallback< SymboleCellClass, PoleCellClass, LocalCellClass,
CudaCellGroupClass, CudaParticleGroupClass, CudaParticleContainerClass, CudaKernelClass>(
(unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[0]),
STARPU_VARIABLE_GET_ELEMSIZE(buffers[0]),
subCellGroupsPtr,subCellGroupsSize,
(unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[1]),
subCellGroupsPtr,subCellGroupsSize,subCellGroupsUpPtr,
nbSubCellGroups, idxLevel, kernel, starpu_cuda_get_local_stream());
}
......@@ -123,11 +126,6 @@ public:
/////////////////////////////////////////////////////////////////////////////////////
#ifdef STARPU_USE_MPI
static void transferInoutPassCallbackMpi(void *buffers[], void *cl_arg){
// CudaCellGroupClass currentCells((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[0]),
// STARPU_VARIABLE_GET_ELEMSIZE(buffers[0]));
// CudaCellGroupClass externalCells((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[1]),
// STARPU_VARIABLE_GET_ELEMSIZE(buffers[1]));
FStarPUPtrInterface* worker = nullptr;
int idxLevel = 0;
const std::vector<OutOfBlockInteraction>* outsideInteractions;
......@@ -135,10 +133,14 @@ public:
CudaKernelClass* kernel = worker->get<ThisClass>(FSTARPU_CPU_IDX)->kernels[starpu_worker_get_id()];
FCuda__transferInoutPassCallbackMpi<CellClass,CudaCellGroupClass, CudaParticleGroupClass, CudaParticleContainerClass, CudaKernelClass>((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[0]),
FCuda__transferInoutPassCallbackMpi< SymboleCellClass, PoleCellClass, LocalCellClass,
CudaCellGroupClass, CudaParticleGroupClass, CudaParticleContainerClass