Commit 533f06c2 authored by BRAMAS Berenger's avatar BRAMAS Berenger

Use global Particle container alignement

parent b775a049
......@@ -19,6 +19,7 @@
#include "FAbstractParticleContainer.hpp"
#include "FAbstractSerializable.hpp"
#include "Utils/FGlobal.hpp"
#include "Utils/FAlignedMemory.hpp"
#include "Utils/FMath.hpp"
#include "Utils/FPoint.hpp"
......@@ -47,6 +48,9 @@
template <unsigned NbAttributesPerParticle, class AttributeClass = FReal >
class FBasicParticleContainer : public FAbstractParticleContainer, public FAbstractSerializable {
protected:
static const int MemoryAlignement = FP2PDefaultAlignement;
static const int DefaultNbParticles = int(MemoryAlignement/sizeof(FReal));
/** The number of particles in the container */
int nbParticles;
/** 3 pointers to 3 arrays of real to store the position */
......@@ -105,11 +109,11 @@ protected:
void increaseSizeIfNeeded(FSize sizeInput = 1){
if( nbParticles+(sizeInput-1) >= allocatedParticles ){
// allocate memory
const int moduloParticlesNumber = (32/sizeof(FReal)); // We want to be rounded to 32B
allocatedParticles = (FMath::Max(10,int(FReal(nbParticles+sizeInput)*1.5)) + moduloParticlesNumber - 1) & ~(moduloParticlesNumber-1);
const int moduloParticlesNumber = (MemoryAlignement/sizeof(FReal));
allocatedParticles = (FMath::Max(DefaultNbParticles,int(FReal(nbParticles+sizeInput)*1.5)) + moduloParticlesNumber - 1) & ~(moduloParticlesNumber-1);
// init with 0
const size_t allocatedBytes = (sizeof(FReal)*3 + sizeof(AttributeClass)*NbAttributesPerParticle)*allocatedParticles;
FReal* newData = reinterpret_cast<FReal*>(FAlignedMemory::AllocateBytes<32>(allocatedBytes));
FReal* newData = reinterpret_cast<FReal*>(FAlignedMemory::AllocateBytes<MemoryAlignement>(allocatedBytes));
memset( newData, 0, allocatedBytes);
// copy memory
const char*const toDelete = reinterpret_cast<const char*>(positions[0]);
......@@ -387,11 +391,11 @@ public:
buffer >> nbParticles;
if( nbParticles >= allocatedParticles ){
// allocate memory
const int moduloParticlesNumber = (32/sizeof(FReal)); // We want to be rounded to 32B
allocatedParticles = (FMath::Max(10,int(FReal(nbParticles+1)*1.5)) + moduloParticlesNumber - 1) & ~(moduloParticlesNumber-1);
const int moduloParticlesNumber = (MemoryAlignement/sizeof(FReal));
allocatedParticles = (nbParticles + moduloParticlesNumber - 1) & ~(moduloParticlesNumber-1);
// init with 0
const size_t allocatedBytes = (sizeof(FReal)*3 + sizeof(AttributeClass)*NbAttributesPerParticle)*allocatedParticles;
FReal* newData = reinterpret_cast<FReal*>(FAlignedMemory::AllocateBytes<32>(allocatedBytes));
FReal* newData = reinterpret_cast<FReal*>(FAlignedMemory::AllocateBytes<MemoryAlignement>(allocatedBytes));
memset( newData, 0, allocatedBytes);
FAlignedMemory::DeallocBytes(positions[0]);
......
......@@ -67,7 +67,7 @@ public:
(&cellLocals[blockIndexesTable[idxCellPtr]])->~LocalCellClass();
}
}
FAlignedMemory::DeallocByte(memoryBuffer);
FAlignedMemory::DeallocBytes(memoryBuffer);
FAlignedMemory::DeallocBytes(cellMultipoles);
FAlignedMemory::DeallocBytes(cellLocals);
}
......@@ -118,7 +118,7 @@ public:
// Allocate
FAssertLF(0 <= int(memoryToAlloc) && int(memoryToAlloc) < std::numeric_limits<int>::max());
allocatedMemoryInByte = memoryToAlloc;
memoryBuffer = (unsigned char*)FAlignedMemory::AllocateByte<32>(memoryToAlloc);
memoryBuffer = (unsigned char*)FAlignedMemory::AllocateBytes<32>(memoryToAlloc);
FAssertLF(memoryBuffer);
memset(memoryBuffer, 0, memoryToAlloc);
......@@ -133,8 +133,8 @@ public:
blockHeader->numberOfCellsInBlock = inNumberOfCells;
blockHeader->blockIndexesTableSize = blockIndexesTableSize;
cellMultipoles = (PoleCellClass*)FAlignedMemory::AllocateByte<32>(inNumberOfCells*sizeof(PoleCellClass));
cellLocals = (LocalCellClass*)FAlignedMemory::AllocateByte<32>(inNumberOfCells*sizeof(LocalCellClass));
cellMultipoles = (PoleCellClass*)FAlignedMemory::AllocateBytes<32>(inNumberOfCells*sizeof(PoleCellClass));
cellLocals = (LocalCellClass*)FAlignedMemory::AllocateBytes<32>(inNumberOfCells*sizeof(LocalCellClass));
for(int idxCell = 0 ; idxCell < inNumberOfCells ; ++idxCell){
new (&cellMultipoles[idxCell]) PoleCellClass();
new (&cellLocals[idxCell]) LocalCellClass();
......
......@@ -5,6 +5,7 @@
#define FGROUPOFPARTICLES_HPP
#include "../../Utils/FGlobal.hpp"
#include "../../Utils/FAssert.hpp"
#include "../../Containers/FTreeCoordinate.hpp"
#include "../../Utils/FAlignedMemory.hpp"
......@@ -44,7 +45,7 @@ class FGroupOfParticles {
protected:
static const int MemoryAlignementBytes = 32;
static const int MemoryAlignementBytes = FP2PDefaultAlignement;
static const int MemoryAlignementParticles = MemoryAlignementBytes/sizeof(FReal);
/** This function return the correct number of particles that should be used to have a correct pack.
......@@ -149,7 +150,7 @@ public:
// Allocate
FAssertLF(0 <= int(memoryToAlloc) && int(memoryToAlloc) < std::numeric_limits<int>::max());
allocatedMemoryInByte = memoryToAlloc;
memoryBuffer = (unsigned char*)FAlignedMemory::AllocateByte<32>(memoryToAlloc);
memoryBuffer = (unsigned char*)FAlignedMemory::AllocateBytes<MemoryAlignementBytes>(memoryToAlloc);
FAssertLF(memoryBuffer);
memset(memoryBuffer, 0, memoryToAlloc);
......@@ -181,7 +182,7 @@ public:
symAttributes += blockHeader->nbParticlesAllocatedInGroup;
}
attributesBuffer = (AttributeClass*)FAlignedMemory::AllocateByte<32>(blockHeader->attributeOffset*NbAttributesPerParticle);
attributesBuffer = (AttributeClass*)FAlignedMemory::AllocateBytes<MemoryAlignementBytes>(blockHeader->attributeOffset*NbAttributesPerParticle);
memset(attributesBuffer, 0, blockHeader->attributeOffset*NbAttributesPerParticle);
for(unsigned idxAttribute = 0 ; idxAttribute < NbAttributesPerParticle ; ++idxAttribute){
particleAttributes[idxAttribute+NbSymbAttributes] = &attributesBuffer[idxAttribute*nbParticlesAllocatedInGroup];
......
......@@ -761,12 +761,12 @@ protected:
{
if(remoteCellGroups[idxLevel][idxOtherGroup].ptrSymb == nullptr){
const int nbBytesInBlockSymb = processesBlockInfos[idxLevel][idxOtherGroup].bufferSizeSymb;
unsigned char* memoryBlockSymb = (unsigned char*)FAlignedMemory::AllocateByte<32>(nbBytesInBlockSymb);
unsigned char* memoryBlockSymb = (unsigned char*)FAlignedMemory::AllocateBytes<32>(nbBytesInBlockSymb);
remoteCellGroups[idxLevel][idxOtherGroup].ptrSymb = memoryBlockSymb;
starpu_variable_data_register(&remoteCellGroups[idxLevel][idxOtherGroup].handleSymb, 0,
(uintptr_t)remoteCellGroups[idxLevel][idxOtherGroup].ptrSymb, nbBytesInBlockSymb);
const int nbBytesInBlockUp = processesBlockInfos[idxLevel][idxOtherGroup].bufferSizeUp;
unsigned char* memoryBlockUp = (unsigned char*)FAlignedMemory::AllocateByte<32>(nbBytesInBlockUp);
unsigned char* memoryBlockUp = (unsigned char*)FAlignedMemory::AllocateBytes<32>(nbBytesInBlockUp);
remoteCellGroups[idxLevel][idxOtherGroup].ptrUp = memoryBlockUp;
starpu_variable_data_register(&remoteCellGroups[idxLevel][idxOtherGroup].handleUp, 0,
(uintptr_t)remoteCellGroups[idxLevel][idxOtherGroup].ptrUp, nbBytesInBlockUp);
......@@ -864,7 +864,7 @@ protected:
{
if(remoteParticleGroupss[idxOtherGroup].ptrSymb == nullptr){
const int nbBytesInBlock = processesBlockInfos[tree->getHeight()-1][idxOtherGroup].leavesBufferSize;
unsigned char* memoryBlock = (unsigned char*)FAlignedMemory::AllocateByte<32>(nbBytesInBlock);
unsigned char* memoryBlock = (unsigned char*)FAlignedMemory::AllocateBytes<32>(nbBytesInBlock);
remoteParticleGroupss[idxOtherGroup].ptrSymb = memoryBlock;
starpu_variable_data_register(&remoteParticleGroupss[idxOtherGroup].handleSymb, 0,
(uintptr_t)remoteParticleGroupss[idxOtherGroup].ptrSymb, nbBytesInBlock);
......@@ -1041,12 +1041,12 @@ protected:
if(remoteCellGroups[idxLevel][idxHandle].ptrSymb){
starpu_data_unregister(remoteCellGroups[idxLevel][idxHandle].handleSymb);
starpu_data_unregister(remoteCellGroups[idxLevel][idxHandle].handleUp);
FAlignedMemory::DeallocByte(remoteCellGroups[idxLevel][idxHandle].ptrSymb);
FAlignedMemory::DeallocByte(remoteCellGroups[idxLevel][idxHandle].ptrUp);
FAlignedMemory::DeallocBytes(remoteCellGroups[idxLevel][idxHandle].ptrSymb);
FAlignedMemory::DeallocBytes(remoteCellGroups[idxLevel][idxHandle].ptrUp);
if(remoteCellGroups[idxLevel][idxHandle].ptrDown){
starpu_data_unregister(remoteCellGroups[idxLevel][idxHandle].handleDown);
FAlignedMemory::DeallocByte(remoteCellGroups[idxLevel][idxHandle].ptrDown);
FAlignedMemory::DeallocBytes(remoteCellGroups[idxLevel][idxHandle].ptrDown);
}
}
}
......@@ -1056,7 +1056,7 @@ protected:
for(int idxHandle = 0 ; idxHandle < int(remoteParticleGroupss.size()) ; ++idxHandle){
if(remoteParticleGroupss[idxHandle].ptrSymb){
starpu_data_unregister(remoteParticleGroupss[idxHandle].handleSymb);
FAlignedMemory::DeallocByte(remoteParticleGroupss[idxHandle].ptrSymb);
FAlignedMemory::DeallocBytes(remoteParticleGroupss[idxHandle].ptrSymb);
}
}
remoteParticleGroupss.clear();
......@@ -1366,13 +1366,13 @@ protected:
if(remoteCellGroups[idxLevel+1][firstOtherBlock + idxBlockToRecv].ptrSymb == nullptr){
const int nbBytesInBlockSymb = processesBlockInfos[idxLevel+1][firstOtherBlock + idxBlockToRecv].bufferSizeSymb;
unsigned char* memoryBlockSymb = (unsigned char*)FAlignedMemory::AllocateByte<32>(nbBytesInBlockSymb);
unsigned char* memoryBlockSymb = (unsigned char*)FAlignedMemory::AllocateBytes<32>(nbBytesInBlockSymb);
remoteCellGroups[idxLevel+1][firstOtherBlock + idxBlockToRecv].ptrSymb = memoryBlockSymb;
starpu_variable_data_register(&remoteCellGroups[idxLevel+1][firstOtherBlock + idxBlockToRecv].handleSymb, 0,
(uintptr_t)remoteCellGroups[idxLevel+1][firstOtherBlock + idxBlockToRecv].ptrSymb, nbBytesInBlockSymb);
const int nbBytesInBlockUp = processesBlockInfos[idxLevel+1][firstOtherBlock + idxBlockToRecv].bufferSizeUp;
unsigned char* memoryBlockUp = (unsigned char*)FAlignedMemory::AllocateByte<32>(nbBytesInBlockUp);
unsigned char* memoryBlockUp = (unsigned char*)FAlignedMemory::AllocateBytes<32>(nbBytesInBlockUp);
remoteCellGroups[idxLevel+1][firstOtherBlock + idxBlockToRecv].ptrUp = memoryBlockUp;
starpu_variable_data_register(&remoteCellGroups[idxLevel+1][firstOtherBlock + idxBlockToRecv].handleUp, 0,
(uintptr_t)remoteCellGroups[idxLevel+1][firstOtherBlock + idxBlockToRecv].ptrUp, nbBytesInBlockUp);
......@@ -1608,14 +1608,14 @@ protected:
if(remoteCellGroups[idxLevel][firstOtherBlock].ptrSymb == nullptr){
const int nbBytesInBlock = processesBlockInfos[idxLevel][firstOtherBlock].bufferSizeSymb;
unsigned char* memoryBlock = (unsigned char*)FAlignedMemory::AllocateByte<32>(nbBytesInBlock);
unsigned char* memoryBlock = (unsigned char*)FAlignedMemory::AllocateBytes<32>(nbBytesInBlock);
remoteCellGroups[idxLevel][firstOtherBlock].ptrSymb = memoryBlock;
starpu_variable_data_register(&remoteCellGroups[idxLevel][firstOtherBlock].handleSymb, 0,
(uintptr_t)remoteCellGroups[idxLevel][firstOtherBlock].ptrSymb, nbBytesInBlock);
}
if(remoteCellGroups[idxLevel][firstOtherBlock].ptrDown == nullptr){
const int nbBytesInBlock = processesBlockInfos[idxLevel][firstOtherBlock].bufferSizeDown;
unsigned char* memoryBlock = (unsigned char*)FAlignedMemory::AllocateByte<32>(nbBytesInBlock);
unsigned char* memoryBlock = (unsigned char*)FAlignedMemory::AllocateBytes<32>(nbBytesInBlock);
remoteCellGroups[idxLevel][firstOtherBlock].ptrDown = memoryBlock;
starpu_variable_data_register(&remoteCellGroups[idxLevel][firstOtherBlock].handleDown, 0,
(uintptr_t)remoteCellGroups[idxLevel][firstOtherBlock].ptrDown, nbBytesInBlock);
......
......@@ -3,6 +3,7 @@
#define FCUDAGROUPOFPARTICLES_HPP
#include "FCudaGlobal.hpp"
#include "../../Utils/FGlobal.hpp"
#include "../StarPUUtils/FStarPUDefaultAlign.hpp"
template <unsigned NbSymbAttributes, unsigned NbAttributesPerParticle, class AttributeClass = FReal>
......@@ -32,7 +33,7 @@ class FCudaGroupOfParticles {
protected:
static const int MemoryAlignementBytes = 32;
static const int MemoryAlignementBytes = FP2PDefaultAlignement;
static const int MemoryAlignementParticles = MemoryAlignementBytes/sizeof(FReal);
/** This function return the correct number of particles that should be used to have a correct pack.
......
......@@ -16,7 +16,7 @@ typedef long long int MortonIndex;
#define NbAttributesPerParticle ___NbAttributesPerParticle___
#define NbSymbAttributes ___NbSymbAttributes___
#define FOpenCLGroupOfParticlesMemoryAlignementBytes 32
#define FOpenCLGroupOfParticlesMemoryAlignementBytes ___FP2PDefaultAlignement___
#define FOpenCLGroupOfParticlesMemoryAlignementParticles (FOpenCLGroupOfParticlesMemoryAlignementBytes/sizeof(FReal))
#define FOpenCLGroupOfParticlesLeafIsEmptyFlag ((MortonIndex)-1)
......
......@@ -20,6 +20,7 @@ public:
kernelfile.replaceAll("___NbAttributesPerParticle___", 1);
const size_t structAlign = FStarPUDefaultAlign::StructAlign;
kernelfile.replaceAll("___DefaultStructAlign___", structAlign);
kernelfile.replaceAll("___FP2PDefaultAlignement___", FP2PDefaultAlignement);
dim = 1;
}
......
......@@ -106,5 +106,13 @@ typedef long long MortonIndex;
#endif
#endif
///////////////////////////////////////////////////////
// Default P2P Alignement
///////////////////////////////////////////////////////
static const int FP2PDefaultAlignement = 64;
#endif //FGLOBAL_HPP
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment