From 013fed02d9c09ae9828a05e6523a7df0d1004c4a Mon Sep 17 00:00:00 2001 From: bramas <berenger.bramas@inria.fr> Date: Wed, 2 Jul 2014 14:59:05 +0200 Subject: [PATCH] use aligned memory for the blocked tree and the particles container --- Src/GroupTree/FGroupOfParticles.hpp | 57 +++++++++++++++++++++-------- Src/GroupTree/FGroupTree.hpp | 15 +++----- 2 files changed, 47 insertions(+), 25 deletions(-) diff --git a/Src/GroupTree/FGroupOfParticles.hpp b/Src/GroupTree/FGroupOfParticles.hpp index 8ecbe231c..b575b46b7 100644 --- a/Src/GroupTree/FGroupOfParticles.hpp +++ b/Src/GroupTree/FGroupOfParticles.hpp @@ -4,6 +4,7 @@ #include "../Utils/FAssert.hpp" #include "../Containers/FTreeCoordinate.hpp" +#include "../Utils/FAlignedMemory.hpp" #include <list> #include <functional> @@ -29,6 +30,22 @@ class FGroupOfParticles { }; protected: + static const int MemoryAlignementBytes = 32; + static const int MemoryAlignementParticles = MemoryAlignementBytes/sizeof(FReal); + + /** This function return the correct number of particles that should be used to have a correct pack. + * If alignement is 32 and use double (so 4 particles in pack), then this function returns: + * RoundToUpperParticles(1) = 1 + 3 = 4 + * RoundToUpperParticles(63) = 63 + 1 = 64 + */ + template <class NumClass> + static NumClass RoundToUpperParticles(const NumClass& nbParticles){ + return nbParticles + (MemoryAlignementParticles - (nbParticles%MemoryAlignementParticles)%MemoryAlignementParticles); + } + + //< This value is for not used leaves + static const int LeafIsEmptyFlag = -1; + //< Pointer to a block memory unsigned char* memoryBuffer; @@ -40,6 +57,8 @@ protected: LeafHeader* leafHeader; //< The total number of particles in the group const int nbParticlesInGroup; + //< The real number of particles allocated + int nbParticlesAllocatedInGroup; //< Pointers to particle position x, y, z FReal* particlePosition[3]; @@ -51,9 +70,6 @@ protected: //< Bytes difference/offset between attributes size_t attributeOffset; - //< This value is for not used leaves - static const int LeafIsEmptyFlag = -1; - public: /** * @brief FGroupOfParticles @@ -63,20 +79,24 @@ public: */ FGroupOfParticles(const MortonIndex inStartingIndex, const MortonIndex inEndingIndex, const int inNumberOfLeaves, const int inNbParticles) : memoryBuffer(nullptr), blockHeader(nullptr), blockIndexesTable(nullptr), leafHeader(nullptr), nbParticlesInGroup(inNbParticles), - positionOffset(0), attributeOffset(0) { + nbParticlesAllocatedInGroup(0), positionOffset(0), attributeOffset(0) { memset(particlePosition, 0, sizeof(particlePosition)); memset(particleAttributes, 0, sizeof(particleAttributes)); + nbParticlesAllocatedInGroup = RoundToUpperParticles(nbParticlesInGroup+(MemoryAlignementParticles-1)*inNumberOfLeaves); + // Find the number of leaf to allocate in the blocks const int blockIndexesTableSize = int(inEndingIndex-inStartingIndex); FAssertLF(inNumberOfLeaves <= blockIndexesTableSize); // Total number of bytes in the block const size_t sizeOfOneParticle = (3*sizeof(FReal) + NbAttributesPerParticle*sizeof(AttributeClass)); - const size_t memoryToAlloc = sizeof(BlockHeader) + (blockIndexesTableSize*sizeof(int)) + (inNumberOfLeaves*sizeof(LeafHeader)) - + inNbParticles*sizeOfOneParticle; + const size_t memoryToAlloc = sizeof(BlockHeader) + + (blockIndexesTableSize*sizeof(int)) + + (inNumberOfLeaves*sizeof(LeafHeader)) + + nbParticlesAllocatedInGroup*sizeOfOneParticle; // Allocate - memoryBuffer = new unsigned char[memoryToAlloc]; + memoryBuffer = (unsigned char*)FAlignedMemory::Allocate32BAligned(memoryToAlloc); FAssertLF(memoryBuffer); memset(memoryBuffer, 0, memoryToAlloc); @@ -92,17 +112,18 @@ public: blockHeader->blockIndexesTableSize = blockIndexesTableSize; // Init particle pointers - positionOffset = (sizeof(FReal) * inNbParticles); - particlePosition[0] = reinterpret_cast<FReal*>(leafHeader + inNumberOfLeaves); - particlePosition[1] = (particlePosition[0] + inNbParticles); - particlePosition[2] = (particlePosition[1] + inNbParticles); + positionOffset = (sizeof(FReal) * nbParticlesAllocatedInGroup); + particlePosition[0] = reinterpret_cast<FReal*>((reinterpret_cast<size_t>(leafHeader + inNumberOfLeaves) + +MemoryAlignementBytes-1) & ~(MemoryAlignementBytes-1)); + particlePosition[1] = (particlePosition[0] + nbParticlesAllocatedInGroup); + particlePosition[2] = (particlePosition[1] + nbParticlesAllocatedInGroup); // Redirect pointer to data - attributeOffset = (sizeof(AttributeClass) * inNbParticles); - unsigned char* previousPointer = reinterpret_cast<unsigned char*>(particlePosition[2] + inNbParticles); + attributeOffset = (sizeof(AttributeClass) * nbParticlesAllocatedInGroup); + unsigned char* previousPointer = reinterpret_cast<unsigned char*>(particlePosition[2] + nbParticlesAllocatedInGroup); for(unsigned idxAttribute = 0 ; idxAttribute < NbAttributesPerParticle ; ++idxAttribute){ particleAttributes[idxAttribute] = reinterpret_cast<AttributeClass*>(previousPointer); - previousPointer += sizeof(AttributeClass)*inNbParticles; + previousPointer += sizeof(AttributeClass)*nbParticlesAllocatedInGroup; } // Set all index to not used @@ -113,7 +134,7 @@ public: /** Call the destructor of leaves and dealloc block memory */ ~FGroupOfParticles(){ - delete[] memoryBuffer; + FAlignedMemory::Dealloc32BAligned(memoryBuffer); } /** The index of the fist leaf (set from the constructor) */ @@ -152,13 +173,17 @@ public: } /** Allocate a new leaf by calling its constructor */ - void newLeaf(const MortonIndex inIndex, const int id, const int nbParticles, const size_t offsetInGroup){ + size_t newLeaf(const MortonIndex inIndex, const int id, const int nbParticles, const size_t offsetInGroup){ FAssertLF(isInside(inIndex)); FAssertLF(!exists(inIndex)); FAssertLF(id < blockHeader->blockIndexesTableSize); blockIndexesTable[inIndex-blockHeader->startingIndex] = id; leafHeader[id].nbParticles = nbParticles; leafHeader[id].offSet = offsetInGroup; + + const size_t nextLeafOffsetInGroup = RoundToUpperParticles(offsetInGroup+nbParticles); + FAssertLF(nextLeafOffsetInGroup <= size_t(nbParticlesAllocatedInGroup)); + return nextLeafOffsetInGroup; } /** Iterate on each allocated leaves */ diff --git a/Src/GroupTree/FGroupTree.hpp b/Src/GroupTree/FGroupTree.hpp index c128f8b90..579b3ad31 100644 --- a/Src/GroupTree/FGroupTree.hpp +++ b/Src/GroupTree/FGroupTree.hpp @@ -105,7 +105,7 @@ public: // Initialize each cell of the block int cellIdInBlock = 0; - int nbParticlesBeforeLeaf = 0; + size_t nbParticlesOffsetBeforeLeaf = 0; while(cellIdInBlock != sizeOfBlock){ // Add cell const CellClass*const oldNode = blockIteratorInOctree.getCurrentCell(); @@ -116,14 +116,13 @@ public: newNode->setCoordinate(oldNode->getCoordinate()); // Add leaf - newParticleBlock->newLeaf(oldNode->getMortonIndex(), cellIdInBlock, + nbParticlesOffsetBeforeLeaf = newParticleBlock->newLeaf(oldNode->getMortonIndex(), cellIdInBlock, blockIteratorInOctree.getCurrentLeaf()->getSrc()->getNbParticles(), - nbParticlesBeforeLeaf); + nbParticlesOffsetBeforeLeaf); BasicAttachedClass attachedLeaf = newParticleBlock->template getLeaf<BasicAttachedClass>(oldNode->getMortonIndex()); attachedLeaf.copyFromContainer(blockIteratorInOctree.getCurrentLeaf()->getSrc(), 0); - nbParticlesBeforeLeaf += blockIteratorInOctree.getCurrentLeaf()->getSrc()->getNbParticles(); cellIdInBlock += 1; blockIteratorInOctree.moveRight(); } @@ -260,7 +259,7 @@ public: sizeOfBlock, lastParticle-firstParticle); // Init cells - int nbParticlesBeforeLeaf = 0; + size_t nbParticlesOffsetBeforeLeaf = 0; for(int cellIdInBlock = 0; cellIdInBlock != sizeOfBlock ; ++cellIdInBlock){ newBlock->newCell(currentBlockIndexes[cellIdInBlock], cellIdInBlock); @@ -271,16 +270,14 @@ public: newNode->setCoordinate(coord); // Add leaf - newParticleBlock->newLeaf(currentBlockIndexes[cellIdInBlock], cellIdInBlock, - nbParticlesPerLeaf[cellIdInBlock], nbParticlesBeforeLeaf); + nbParticlesOffsetBeforeLeaf = newParticleBlock->newLeaf(currentBlockIndexes[cellIdInBlock], cellIdInBlock, + nbParticlesPerLeaf[cellIdInBlock], nbParticlesOffsetBeforeLeaf); BasicAttachedClass attachedLeaf = newParticleBlock->template getLeaf<BasicAttachedClass>(currentBlockIndexes[cellIdInBlock]); // Copy each particle from the original position for(int idxPart = 0 ; idxPart < nbParticlesPerLeaf[cellIdInBlock] ; ++idxPart){ attachedLeaf.setParticle(idxPart, particlesToSort[idxPart + firstParticle].originalIndex, inParticlesContainer); } - - nbParticlesBeforeLeaf += nbParticlesPerLeaf[cellIdInBlock]; } // Keep the block -- GitLab