Attention une mise à jour du serveur va être effectuée le vendredi 16 avril entre 12h et 12h30. Cette mise à jour va générer une interruption du service de quelques minutes.

Commit c6ac1155 authored by BRAMAS Berenger's avatar BRAMAS Berenger

make cuda compile but cannot test on my laptop

parent 802eb4f4
......@@ -30,7 +30,7 @@
#include "../Cuda/FCudaGroupAttachedLeaf.hpp"
#include "../Cuda/FCudaGroupOfParticles.hpp"
#include "../Cuda/FCudaGroupOfCells.hpp"
#include "../Cuda/FCudaEmptyCell.hpp"
#include "../Cuda/FCudaEmptyCellSymb.hpp"
#endif
#ifdef ScalFMM_ENABLE_OPENCL_KERNEL
#include "../StarPUUtils/FStarPUOpenClWrapper.hpp"
......@@ -40,7 +40,8 @@
template <class OctreeClass, class CellContainerClass, class KernelClass, class ParticleGroupClass, class StarPUCpuWrapperClass
#ifdef ScalFMM_ENABLE_CUDA_KERNEL
, class StarPUCudaWrapperClass = FStarPUCudaWrapper<KernelClass, FCudaEmptyCell, FCudaGroupOfCells<FCudaEmptyCell>, FCudaGroupOfParticles<0, int>, FCudaGroupAttachedLeaf<0, int>, FCudaEmptyKernel<>>
, class StarPUCudaWrapperClass = FStarPUCudaWrapper<KernelClass, FCudaEmptyCellSymb, int, int, FCudaGroupOfCells<FCudaEmptyCellSymb, int, int>,
FCudaGroupOfParticles<0, 0, int>, FCudaGroupAttachedLeaf<0, 0, int>, FCudaEmptyKernel>
#endif
#ifdef ScalFMM_ENABLE_OPENCL_KERNEL
, class StarPUOpenClWrapperClass = FStarPUOpenClWrapper<KernelClass, FOpenCLDeviceWrapper<KernelClass>>
......
//@SCALFMM_PRIVATE
#ifndef FCUDACOMPLETECELL_HPP
#define FCUDACOMPLETECELL_HPP
template <class SymboleCellClass, class PoleCellClass, class LocalCellClass>
struct alignas(FStarPUDefaultAlign::StructAlign) FCudaCompositeCell {
__device__ FCudaCompositeCell()
: symb(nullptr), up(nullptr), down(nullptr){
}
SymboleCellClass* symb;
PoleCellClass* up;
LocalCellClass* down;
};
#endif // FCUDACOMPLETECELL_HPP
This diff is collapsed.
......@@ -7,56 +7,80 @@
#include "../Core/FOutOfBlockInteraction.hpp"
#include "FCudaStructParams.hpp"
template <class CellClass, class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__bottomPassCallback(unsigned char* leafCellsPtr, std::size_t leafCellsSize,
template <class SymboleCellClass, class PoleCellClass, class LocalCellClass,
class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__bottomPassCallback(unsigned char* leafCellsPtr, std::size_t leafCellsSize, unsigned char* leafCellsUpPtr,
unsigned char* containersPtr, std::size_t containersSize,
CudaKernelClass* kernel, cudaStream_t currentStream);
template <class CellClass, class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__upwardPassCallback(unsigned char* currentCellsPtr, std::size_t currentCellsSize,
template <class SymboleCellClass, class PoleCellClass, class LocalCellClass,
class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__upwardPassCallback(
unsigned char* currentCellsPtr, std::size_t currentCellsSize, unsigned char* currentCellsUpPtr,
FCudaParams<unsigned char*,9> subCellGroupsPtr, FCudaParams<std::size_t, 9> subCellGroupsSize,
FCudaParams<unsigned char*,9> subCellGroupsUpPtr,
int nbSubCellGroups, int idxLevel, CudaKernelClass* kernel, cudaStream_t currentStream);
template <class CellClass, class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__transferInoutPassCallbackMpi(unsigned char* currentCellsPtr, std::size_t currentCellsSize,
unsigned char* externalCellsPtr, std::size_t externalCellsSize,
template <class SymboleCellClass, class PoleCellClass, class LocalCellClass,
class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__transferInoutPassCallbackMpi(
unsigned char* currentCellsPtr, std::size_t currentCellsSize, unsigned char* currentCellsDownPtr,
unsigned char* externalCellsPtr, std::size_t externalCellsSize, unsigned char* externalCellsUpPtr,
int idxLevel, const OutOfBlockInteraction* outsideInteractions,
int nbOutsideInteractions, CudaKernelClass* kernel, cudaStream_t currentStream);
template <class CellClass, class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__transferInPassCallback(unsigned char* currentCellsPtr, std::size_t currentCellsSize,
template <class SymboleCellClass, class PoleCellClass, class LocalCellClass,
class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__transferInPassCallback(
unsigned char* currentCellsPtr, std::size_t currentCellsSize,
unsigned char* currentCellsUpPtr, unsigned char* currentCellsDownPtr,
int idxLevel, CudaKernelClass* kernel, cudaStream_t currentStream);
template <class CellClass, class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__transferInoutPassCallback(unsigned char* currentCellsPtr, std::size_t currentCellsSize,
template <class SymboleCellClass, class PoleCellClass, class LocalCellClass,
class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__transferInoutPassCallback(
unsigned char* currentCellsPtr, std::size_t currentCellsSize,
unsigned char* currentCellsUpPtr, unsigned char* currentCellsDownPtr,
unsigned char* externalCellsPtr, std::size_t externalCellsSize,
unsigned char* externalCellsUpPtr, unsigned char* externalCellsDownPtr,
int idxLevel, const OutOfBlockInteraction* outsideInteractions,
int nbOutsideInteractions, CudaKernelClass* kernel, cudaStream_t currentStream);
template <class CellClass, class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__downardPassCallback(unsigned char* currentCellsPtr, std::size_t currentCellsSize,
template <class SymboleCellClass, class PoleCellClass, class LocalCellClass,
class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__downardPassCallback(
unsigned char* currentCellsPtr, std::size_t currentCellsSize, unsigned char* currentCellsDownPtr,
FCudaParams<unsigned char*,9> subCellGroupsPtr, FCudaParams<std::size_t,9> subCellGroupsSize,
FCudaParams<unsigned char*,9> subCellGroupsDownPtr,
int nbSubCellGroups, int idxLevel, CudaKernelClass* kernel, cudaStream_t currentStream);
template <class CellClass, class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__directInoutPassCallbackMpi(unsigned char* containersPtr, std::size_t containersSize,
template <class SymboleCellClass, class PoleCellClass, class LocalCellClass,
class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__directInoutPassCallbackMpi(
unsigned char* containersPtr, std::size_t containersSize, unsigned char* containersDownPtr,
unsigned char* externalContainersPtr, std::size_t externalContainersSize,
const OutOfBlockInteraction* outsideInteractions,
int nbOutsideInteractions, const int treeHeight, CudaKernelClass* kernel, cudaStream_t currentStream);
template <class CellClass, class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__directInPassCallback(unsigned char* containersPtr, std::size_t containersSize,
template <class SymboleCellClass, class PoleCellClass, class LocalCellClass,
class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__directInPassCallback(
unsigned char* containersPtr, std::size_t containersSize, unsigned char* containersDownPtr,
const int treeHeight, CudaKernelClass* kernel, cudaStream_t currentStream);
template <class CellClass, class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__directInoutPassCallback(unsigned char* containersPtr, std::size_t containersSize,
unsigned char* externalContainersPtr, std::size_t externalContainersSize,
template <class SymboleCellClass, class PoleCellClass, class LocalCellClass,
class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__directInoutPassCallback(
unsigned char* containersPtr, std::size_t containersSize, unsigned char* containersDownPtr,
unsigned char* externalContainersPtr, std::size_t externalContainersSize, unsigned char* externalContainersDownPtr,
const OutOfBlockInteraction* outsideInteractions,
int nbOutsideInteractions, const int treeHeight, CudaKernelClass* kernel, cudaStream_t currentStream);
template <class CellClass, class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__mergePassCallback(unsigned char* leafCellsPtr, std::size_t leafCellsSize,
unsigned char* containersPtr, std::size_t containersSize,
template <class SymboleCellClass, class PoleCellClass, class LocalCellClass,
class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__mergePassCallback(
unsigned char* leafCellsPtr, std::size_t leafCellsSize, unsigned char* leafCellsDownPtr,
unsigned char* containersPtr, std::size_t containersSize, unsigned char* containersDownPtr,
CudaKernelClass* kernel, cudaStream_t currentStream);
template <class CudaKernelClass>
......
#ifndef FCUDAEMPTYCELL_HPP
#define FCUDAEMPTYCELL_HPP
#ifndef FCUDAEMPTYCELLSYMB_HPP
#define FCUDAEMPTYCELLSYMB_HPP
#include "../../Utils/FGlobal.hpp"
#include "../../Containers/FTreeCoordinate.hpp"
#include "../StarPUUtils/FStarPUDefaultAlign.hpp"
struct alignas(FStarPUDefaultAlign::StructAlign) FCudaEmptyCell {
struct alignas(FStarPUDefaultAlign::StructAlign) FCudaEmptyCellSymb {
MortonIndex mortonIndex;
int coordinates[3];
};
#endif // FCUDAEMPTYCELL_HPP
#endif // FCUDAEMPTYCELLSYMB_HPP
......@@ -4,29 +4,32 @@
#include "FCudaGlobal.hpp"
#include "FCudaGroupAttachedLeaf.hpp"
#include "FCudaEmptyCell.hpp"
#include "FCudaEmptyCellSymb.hpp"
#include "FCudaCompositeCell.hpp"
/**
* This class defines what should be a Cuda kernel.
*/
template <class CellClass = FCudaEmptyCell, class ContainerClass = FCudaGroupAttachedLeaf<0, int>>
class FCudaEmptyKernel {
protected:
public:
__device__ void P2M(CellClass* /*pole*/, const ContainerClass* const /*particles*/) {
typedef FCudaGroupAttachedLeaf<0,0,int> ContainerClass;
typedef FCudaCompositeCell<FCudaEmptyCellSymb,int,int> CellClass;
__device__ void P2M(CellClass /*pole*/, const ContainerClass* const /*particles*/) {
}
__device__ void M2M(CellClass* /*pole*/, const CellClass* /*child*/[8], const int /*level*/) {
__device__ void M2M(CellClass /*pole*/, const CellClass /*child*/[8], const int /*level*/) {
}
__device__ void M2L(CellClass* /*pole*/, const CellClass* /*distantNeighbors*/[343],
__device__ void M2L(CellClass /*pole*/, const CellClass /*distantNeighbors*/[343],
const int /*size*/, const int /*level*/) {
}
__device__ void L2L(const CellClass* /*local*/, CellClass* /*child*/[8], const int /*level*/) {
__device__ void L2L(const CellClass /*local*/, CellClass /*child*/[8], const int /*level*/) {
}
__device__ void L2P(const CellClass* /*local*/, ContainerClass*const /*particles*/){
__device__ void L2P(const CellClass /*local*/, ContainerClass*const /*particles*/){
}
__device__ void P2P(const int3& ,
......
......@@ -4,7 +4,7 @@
#include "FCudaGlobal.hpp"
template <unsigned NbAttributesPerParticle, class AttributeClass = FReal>
template <unsigned NbSymbAttributes, unsigned NbAttributesPerParticle, class AttributeClass = FReal>
class FCudaGroupAttachedLeaf {
protected:
//< Nb of particles in the current leaf
......@@ -12,13 +12,13 @@ protected:
//< Pointers to the positions of the particles
FReal* positionsPointers[3];
//< Pointers to the attributes of the particles
AttributeClass* attributes[NbAttributesPerParticle];
AttributeClass* attributes[NbSymbAttributes+NbAttributesPerParticle];
public:
/** Empty constructor to point to nothing */
__device__ FCudaGroupAttachedLeaf() : nbParticles(-1) {
memset(positionsPointers, 0, sizeof(FReal*) * 3);
memset(attributes, 0, sizeof(AttributeClass*) * NbAttributesPerParticle);
memset(attributes, 0, sizeof(AttributeClass*) * (NbSymbAttributes+NbAttributesPerParticle));
}
/**
......@@ -37,9 +37,19 @@ public:
positionsPointers[1] = reinterpret_cast<FReal*>(reinterpret_cast<unsigned char*>(inPositionBuffer) + inLeadingPosition);
positionsPointers[2] = reinterpret_cast<FReal*>(reinterpret_cast<unsigned char*>(inPositionBuffer) + inLeadingPosition*2);
unsigned char* symAttributes = reinterpret_cast<unsigned char*>(reinterpret_cast<unsigned char*>(inPositionBuffer) + inLeadingPosition*3);
for(unsigned idxAttribute = 0 ; idxAttribute < NbSymbAttributes ; ++idxAttribute){
attributes[idxAttribute] = reinterpret_cast<AttributeClass*>(symAttributes + idxAttribute*inLeadingAttributes);
}
// Redirect pointers to data
for(unsigned idxAttribute = 0 ; idxAttribute < NbAttributesPerParticle ; ++idxAttribute){
attributes[idxAttribute] = reinterpret_cast<AttributeClass*>(reinterpret_cast<unsigned char*>(inAttributesBuffer) + idxAttribute*inLeadingAttributes);
if(inAttributesBuffer){
for(unsigned idxAttribute = 0 ; idxAttribute < NbAttributesPerParticle ; ++idxAttribute){
attributes[idxAttribute+NbSymbAttributes] = reinterpret_cast<AttributeClass*>(reinterpret_cast<unsigned char*>(inAttributesBuffer) + idxAttribute*inLeadingAttributes);
}
}
else{
memset(&attributes[NbSymbAttributes], 0, sizeof(AttributeClass*)*NbAttributesPerParticle);
}
}
......
......@@ -3,13 +3,13 @@
#define FCUDAGROUPOFCELLS_HPP
#include "FCudaGlobal.hpp"
#include "FCudaCompositeCell.hpp"
#include "../StarPUUtils/FStarPUDefaultAlign.hpp"
/**
* @brief The FCudaGroupOfCells class manages the cells in block allocation.
*/
template <class CellClass>
template <class SymboleCellClass, class PoleCellClass, class LocalCellClass>
class FCudaGroupOfCells {
/** One header is allocated at the beginning of each block */
struct alignas(FStarPUDefaultAlign::StructAlign) BlockHeader{
......@@ -30,23 +30,34 @@ protected:
//< Pointer to the indexes table inside the block memory
int* blockIndexesTable;
//< Pointer to the cells inside the block memory
unsigned char* blockCells;
SymboleCellClass* blockCells;
//< This value is for not used cells
static const MortonIndex CellIsEmptyFlag = -1;
//< The multipole data
PoleCellClass* cellMultipoles;
//< The local data
LocalCellClass* cellLocals;
public:
typedef FCudaCompositeCell<SymboleCellClass, PoleCellClass, LocalCellClass> CompleteCellClass;
__device__ FCudaGroupOfCells()
: allocatedMemoryInByte(0), memoryBuffer(nullptr),
blockHeader(nullptr), blockIndexesTable(nullptr), blockCells(nullptr){
blockHeader(nullptr), blockIndexesTable(nullptr), blockCells(nullptr),
cellMultipoles(nullptr), cellLocals(nullptr){
}
__device__ void reset(unsigned char* inBuffer, const size_t inAllocatedMemoryInByte){
__device__ void reset(unsigned char* inBuffer, const size_t inAllocatedMemoryInByte,
unsigned char* inCellMultipoles, unsigned char* inCellLocals){
// Move the pointers to the correct position
allocatedMemoryInByte = (inAllocatedMemoryInByte);
memoryBuffer = (inBuffer);
blockHeader = reinterpret_cast<BlockHeader*>(memoryBuffer);
blockIndexesTable = reinterpret_cast<int*>(memoryBuffer+sizeof(BlockHeader));
blockCells = reinterpret_cast<unsigned char*>(memoryBuffer+sizeof(BlockHeader)+(blockHeader->blockIndexesTableSize*sizeof(int)));
blockCells = reinterpret_cast<SymboleCellClass*>(memoryBuffer+sizeof(BlockHeader)+(blockHeader->blockIndexesTableSize*sizeof(int)));
cellMultipoles = (PoleCellClass*)inCellMultipoles;
cellLocals = (LocalCellClass*)inCellLocals;
}
/**
......@@ -54,27 +65,17 @@ public:
* @param inBuffer
* @param inAllocatedMemoryInByte
*/
__device__ FCudaGroupOfCells(unsigned char* inBuffer, const size_t inAllocatedMemoryInByte)
__device__ FCudaGroupOfCells(unsigned char* inBuffer, const size_t inAllocatedMemoryInByte,
unsigned char* inCellMultipoles, unsigned char* inCellLocals)
: allocatedMemoryInByte(inAllocatedMemoryInByte), memoryBuffer(inBuffer),
blockHeader(nullptr), blockIndexesTable(nullptr), blockCells(nullptr){
blockHeader(nullptr), blockIndexesTable(nullptr), blockCells(nullptr),
cellMultipoles(nullptr), cellLocals(nullptr){
// Move the pointers to the correct position
blockHeader = reinterpret_cast<BlockHeader*>(memoryBuffer);
blockIndexesTable = reinterpret_cast<int*>(memoryBuffer+sizeof(BlockHeader));
blockCells = reinterpret_cast<unsigned char*>(memoryBuffer+sizeof(BlockHeader)+(blockHeader->blockIndexesTableSize*sizeof(int)));
}
/** Call the destructor of cells and dealloc block memory */
__device__ ~FCudaGroupOfCells(){
}
/** Give access to the buffer to send the data */
__device__ const unsigned char* getRawBuffer() const{
return memoryBuffer;
}
/** The the size of the allocated buffer */
__device__ int getBufferSizeInByte() const {
return allocatedMemoryInByte;
blockCells = reinterpret_cast<SymboleCellClass*>(memoryBuffer+sizeof(BlockHeader)+(blockHeader->blockIndexesTableSize*sizeof(int)));
cellMultipoles = (PoleCellClass*)inCellMultipoles;
cellLocals = (LocalCellClass*)inCellLocals;
}
/** The index of the fist cell (set from the constructor) */
......@@ -108,16 +109,65 @@ public:
}
/** Return the address of the cell if it exists (or NULL) */
__device__ CellClass* getCell(const MortonIndex inIndex){
if( exists(inIndex) ) return (CellClass*)(&blockCells[sizeof(CellClass)*blockIndexesTable[inIndex-blockHeader->startingIndex]]);
else return nullptr;
__device__ CompleteCellClass getCompleteCell(const MortonIndex inIndex){
//FAssertLF(cellMultipoles && cellLocals);
if( exists(inIndex) ){
CompleteCellClass cell;
const int cellPos = blockIndexesTable[inIndex-blockHeader->startingIndex];
cell.symb = &blockCells[cellPos];
cell.up = &cellMultipoles[cellPos];
cell.down = &cellLocals[cellPos];
return cell;
}
else{
CompleteCellClass cell;
cell.symb = nullptr;
cell.up = nullptr;
cell.down = nullptr;
return cell;
}
}
/** Return the address of the cell if it exists (or NULL) */
__device__ CompleteCellClass getUpCell(const MortonIndex inIndex){
//FAssertLF(cellMultipoles);
if( exists(inIndex) ){
CompleteCellClass cell;
const int cellPos = blockIndexesTable[inIndex-blockHeader->startingIndex];
cell.symb = &blockCells[cellPos];
cell.up = &cellMultipoles[cellPos];
cell.down = nullptr;
return cell;
}
else{
CompleteCellClass cell;
cell.symb = nullptr;
cell.up = nullptr;
cell.down = nullptr;
return cell;
}
}
/** Return the address of the cell if it exists (or NULL) */
__device__ const CellClass* getCell(const MortonIndex inIndex) const {
if( exists(inIndex) ) return (CellClass*)(&blockCells[sizeof(CellClass)*blockIndexesTable[inIndex-blockHeader->startingIndex]]);
else return nullptr;
__device__ CompleteCellClass getDownCell(const MortonIndex inIndex){
//FAssertLF(cellLocals);
if( exists(inIndex) ){
CompleteCellClass cell;
const int cellPos = blockIndexesTable[inIndex-blockHeader->startingIndex];
cell.symb = &blockCells[cellPos];
cell.up = nullptr;
cell.down = &cellLocals[cellPos];
return cell;
}
else{
CompleteCellClass cell;
cell.symb = nullptr;
cell.up = nullptr;
cell.down = nullptr;
return cell;
}
}
};
#endif // FCUDAGROUPOFCELLS_HPP
......
......@@ -5,7 +5,7 @@
#include "FCudaGlobal.hpp"
#include "../StarPUUtils/FStarPUDefaultAlign.hpp"
template <unsigned NbAttributesPerParticle, class AttributeClass = FReal>
template <unsigned NbSymbAttributes, unsigned NbAttributesPerParticle, class AttributeClass = FReal>
class FCudaGroupOfParticles {
/** One header is allocated at the beginning of each block */
struct alignas(FStarPUDefaultAlign::StructAlign) BlockHeader{
......@@ -66,7 +66,8 @@ protected:
FReal* particlePosition[3];
//< Pointers to the particles data inside the block memory
AttributeClass* particleAttributes[NbAttributesPerParticle];
AttributeClass* attributesBuffer;
AttributeClass* particleAttributes[NbSymbAttributes+NbAttributesPerParticle];
public:
/**
......@@ -74,9 +75,11 @@ public:
* @param inBuffer
* @param inAllocatedMemoryInByte
*/
__device__ FCudaGroupOfParticles(unsigned char* inBuffer, const size_t inAllocatedMemoryInByte)
__device__ FCudaGroupOfParticles(unsigned char* inBuffer, const size_t inAllocatedMemoryInByte,
unsigned char* inAttributes)
: allocatedMemoryInByte(inAllocatedMemoryInByte), memoryBuffer(inBuffer),
blockHeader(nullptr), blockIndexesTable(nullptr), leafHeader(nullptr), nbParticlesInGroup(0){
blockHeader(nullptr), blockIndexesTable(nullptr), leafHeader(nullptr), nbParticlesInGroup(0),
attributesBuffer(nullptr){
// Move the pointers to the correct position
blockHeader = reinterpret_cast<BlockHeader*>(memoryBuffer);
blockIndexesTable = reinterpret_cast<int*>(memoryBuffer+sizeof(BlockHeader));
......@@ -91,25 +94,17 @@ public:
// Redirect pointer to data
blockHeader->attributeOffset = (sizeof(AttributeClass) * blockHeader->nbParticlesAllocatedInGroup);
unsigned char* previousPointer = reinterpret_cast<unsigned char*>(particlePosition[2] + blockHeader->nbParticlesAllocatedInGroup);
for(unsigned idxAttribute = 0 ; idxAttribute < NbAttributesPerParticle ; ++idxAttribute){
particleAttributes[idxAttribute] = reinterpret_cast<AttributeClass*>(previousPointer);
previousPointer += sizeof(AttributeClass)*blockHeader->nbParticlesAllocatedInGroup;
AttributeClass* symAttributes = (AttributeClass*)(&particlePosition[2][blockHeader->nbParticlesAllocatedInGroup]);
for(unsigned idxAttribute = 0 ; idxAttribute < NbSymbAttributes ; ++idxAttribute){
particleAttributes[idxAttribute] = symAttributes;
symAttributes += blockHeader->nbParticlesAllocatedInGroup;
}
if(inAttributes){
attributesBuffer = (AttributeClass*)inAttributes;
for(unsigned idxAttribute = 0 ; idxAttribute < NbAttributesPerParticle ; ++idxAttribute){
particleAttributes[idxAttribute+NbSymbAttributes] = &attributesBuffer[idxAttribute*blockHeader->nbParticlesAllocatedInGroup];
}
}
}
/** Call the destructor of leaves and dealloc block memory */
__device__ ~FCudaGroupOfParticles(){
}
/** Give access to the buffer to send the data */
__device__ const unsigned char* getRawBuffer() const{
return memoryBuffer;
}
/** The the size of the allocated buffer */
__device__ int getBufferSizeInByte() const {
return allocatedMemoryInByte;
}
/** The index of the fist leaf (set from the constructor) */
......
......@@ -33,12 +33,13 @@
#include "FStarPUUtils.hpp"
template <class KernelClass, class CellClass, class CudaCellGroupClass,
class CudaParticleGroupClass, class CudaParticleContainerClass,
template <class KernelClass, class SymboleCellClass, class PoleCellClass, class LocalCellClass,
class CudaCellGroupClass, class CudaParticleGroupClass, class CudaParticleContainerClass,
class CudaKernelClass>
class FStarPUCudaWrapper {
protected:
typedef FStarPUCudaWrapper<KernelClass, CellClass, CudaCellGroupClass, CudaParticleGroupClass, CudaParticleContainerClass, CudaKernelClass> ThisClass;
typedef FStarPUCudaWrapper<KernelClass, SymboleCellClass, PoleCellClass, LocalCellClass,
CudaCellGroupClass, CudaParticleGroupClass, CudaParticleContainerClass, CudaKernelClass> ThisClass;
template <class OtherBlockClass>
struct BlockInteractions{
......@@ -72,20 +73,19 @@ public:
}
static void bottomPassCallback(void *buffers[], void *cl_arg){
//CudaCellGroupClass leafCells((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[0]),
// STARPU_VARIABLE_GET_ELEMSIZE(buffers[0]));
//CudaParticleGroupClass containers((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[1]),
// STARPU_VARIABLE_GET_ELEMSIZE(buffers[1]));
FStarPUPtrInterface* worker = nullptr;
starpu_codelet_unpack_args(cl_arg, &worker);
CudaKernelClass* kernel = worker->get<ThisClass>(FSTARPU_CPU_IDX)->kernels[starpu_worker_get_id()];
FCuda__bottomPassCallback<CellClass,CudaCellGroupClass, CudaParticleGroupClass, CudaParticleContainerClass, CudaKernelClass>((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[0]),
STARPU_VARIABLE_GET_ELEMSIZE(buffers[0]),
(unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[1]),
STARPU_VARIABLE_GET_ELEMSIZE(buffers[1]),
kernel, starpu_cuda_get_local_stream());
FCuda__bottomPassCallback< SymboleCellClass, PoleCellClass, LocalCellClass,
CudaCellGroupClass, CudaParticleGroupClass, CudaParticleContainerClass, CudaKernelClass>(
(unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[0]),
STARPU_VARIABLE_GET_ELEMSIZE(buffers[0]),
(unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[1]),
(unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[2]),
STARPU_VARIABLE_GET_ELEMSIZE(buffers[2]),
kernel, starpu_cuda_get_local_stream());
}
/////////////////////////////////////////////////////////////////////////////////////
......@@ -93,9 +93,6 @@ public:
/////////////////////////////////////////////////////////////////////////////////////
static void upwardPassCallback(void *buffers[], void *cl_arg){
//CudaCellGroupClass currentCells((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[0]),
// STARPU_VARIABLE_GET_ELEMSIZE(buffers[0]));
FStarPUPtrInterface* worker = nullptr;
int nbSubCellGroups = 0;
int idxLevel = 0;
......@@ -105,16 +102,22 @@ public:
memset(&subCellGroupsPtr, 0, sizeof(subCellGroupsPtr));
FCudaParams<std::size_t,9> subCellGroupsSize;
memset(&subCellGroupsPtr, 0, sizeof(subCellGroupsSize));
FCudaParams<unsigned char*,9> subCellGroupsUpPtr;
memset(&subCellGroupsUpPtr, 0, sizeof(subCellGroupsUpPtr));
for(int idxSubGroup = 0; idxSubGroup < nbSubCellGroups ; ++idxSubGroup){
subCellGroupsPtr.values[idxSubGroup] = ((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[idxSubGroup+1]));
subCellGroupsSize.values[idxSubGroup] = STARPU_VARIABLE_GET_ELEMSIZE(buffers[idxSubGroup+1]);
subCellGroupsPtr.values[idxSubGroup] = ((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[(idxSubGroup*2)+2]));
subCellGroupsSize.values[idxSubGroup] = STARPU_VARIABLE_GET_ELEMSIZE(buffers[(idxSubGroup*2)+2]);
subCellGroupsUpPtr.values[idxSubGroup] = (unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[(idxSubGroup*2)+3]);
}
CudaKernelClass* kernel = worker->get<ThisClass>(FSTARPU_CPU_IDX)->kernels[starpu_worker_get_id()];
FCuda__upwardPassCallback<CellClass,CudaCellGroupClass, CudaParticleGroupClass, CudaParticleContainerClass, CudaKernelClass>((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[0]),
FCuda__upwardPassCallback< SymboleCellClass, PoleCellClass, LocalCellClass,
CudaCellGroupClass, CudaParticleGroupClass, CudaParticleContainerClass, CudaKernelClass>(
(unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[0]),
STARPU_VARIABLE_GET_ELEMSIZE(buffers[0]),
subCellGroupsPtr,subCellGroupsSize,
(unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[1]),
subCellGroupsPtr,subCellGroupsSize,subCellGroupsUpPtr,
nbSubCellGroups, idxLevel, kernel, starpu_cuda_get_local_stream());
}
......@@ -123,11 +126,6 @@ public:
/////////////////////////////////////////////////////////////////////////////////////
#ifdef STARPU_USE_MPI
static void transferInoutPassCallbackMpi(void *buffers[], void *cl_arg){
// CudaCellGroupClass currentCells((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[0]),
// STARPU_VARIABLE_GET_ELEMSIZE(buffers[0]));
// CudaCellGroupClass externalCells((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[1]),
// STARPU_VARIABLE_GET_ELEMSIZE(buffers[1]));
FStarPUPtrInterface* worker = nullptr;
int idxLevel = 0;
const std::vector<OutOfBlockInteraction>* outsideInteractions;
......@@ -135,10 +133,14 @@ public:
CudaKernelClass* kernel = worker->get<ThisClass>(FSTARPU_CPU_IDX)->kernels[starpu_worker_get_id()];
FCuda__transferInoutPassCallbackMpi<CellClass,CudaCellGroupClass, CudaParticleGroupClass, CudaParticleContainerClass, CudaKernelClass>((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[0]),
FCuda__transferInoutPassCallbackMpi< SymboleCellClass, PoleCellClass, LocalCellClass,
CudaCellGroupClass, CudaParticleGroupClass, CudaParticleContainerClass, CudaKernelClass>(