From 013fed02d9c09ae9828a05e6523a7df0d1004c4a Mon Sep 17 00:00:00 2001
From: bramas <berenger.bramas@inria.fr>
Date: Wed, 2 Jul 2014 14:59:05 +0200
Subject: [PATCH] use aligned memory for the blocked tree and the particles
 container

---
 Src/GroupTree/FGroupOfParticles.hpp | 57 +++++++++++++++++++++--------
 Src/GroupTree/FGroupTree.hpp        | 15 +++-----
 2 files changed, 47 insertions(+), 25 deletions(-)

diff --git a/Src/GroupTree/FGroupOfParticles.hpp b/Src/GroupTree/FGroupOfParticles.hpp
index 8ecbe231c..b575b46b7 100644
--- a/Src/GroupTree/FGroupOfParticles.hpp
+++ b/Src/GroupTree/FGroupOfParticles.hpp
@@ -4,6 +4,7 @@
 
 #include "../Utils/FAssert.hpp"
 #include "../Containers/FTreeCoordinate.hpp"
+#include "../Utils/FAlignedMemory.hpp"
 
 #include <list>
 #include <functional>
@@ -29,6 +30,22 @@ class FGroupOfParticles {
     };
 
 protected:
+    static const int MemoryAlignementBytes     = 32;
+    static const int MemoryAlignementParticles = MemoryAlignementBytes/sizeof(FReal);
+
+    /** This function return the correct number of particles that should be used to have a correct pack.
+     * If alignement is 32 and use double (so 4 particles in pack), then this function returns:
+     * RoundToUpperParticles(1) = 1 + 3 = 4
+     * RoundToUpperParticles(63) = 63 + 1 = 64
+     */
+    template <class NumClass>
+    static NumClass RoundToUpperParticles(const NumClass& nbParticles){
+        return nbParticles + (MemoryAlignementParticles - (nbParticles%MemoryAlignementParticles)%MemoryAlignementParticles);
+    }
+
+    //< This value is for not used leaves
+    static const int LeafIsEmptyFlag = -1;
+
     //< Pointer to a block memory
     unsigned char* memoryBuffer;
 
@@ -40,6 +57,8 @@ protected:
     LeafHeader*     leafHeader;
     //< The total number of particles in the group
     const int nbParticlesInGroup;
+    //< The real number of particles allocated
+    int nbParticlesAllocatedInGroup;
 
     //< Pointers to particle position x, y, z
     FReal* particlePosition[3];
@@ -51,9 +70,6 @@ protected:
     //< Bytes difference/offset between attributes
     size_t attributeOffset;
 
-    //< This value is for not used leaves
-    static const int LeafIsEmptyFlag = -1;
-
 public:
     /**
  * @brief FGroupOfParticles
@@ -63,20 +79,24 @@ public:
  */
     FGroupOfParticles(const MortonIndex inStartingIndex, const MortonIndex inEndingIndex, const int inNumberOfLeaves, const int inNbParticles)
         : memoryBuffer(nullptr), blockHeader(nullptr), blockIndexesTable(nullptr), leafHeader(nullptr), nbParticlesInGroup(inNbParticles),
-          positionOffset(0), attributeOffset(0) {
+          nbParticlesAllocatedInGroup(0), positionOffset(0), attributeOffset(0) {
         memset(particlePosition, 0, sizeof(particlePosition));
         memset(particleAttributes, 0, sizeof(particleAttributes));
 
+        nbParticlesAllocatedInGroup = RoundToUpperParticles(nbParticlesInGroup+(MemoryAlignementParticles-1)*inNumberOfLeaves);
+
         // Find the number of leaf to allocate in the blocks
         const int blockIndexesTableSize = int(inEndingIndex-inStartingIndex);
         FAssertLF(inNumberOfLeaves <= blockIndexesTableSize);
         // Total number of bytes in the block
         const size_t sizeOfOneParticle = (3*sizeof(FReal) + NbAttributesPerParticle*sizeof(AttributeClass));
-        const size_t memoryToAlloc = sizeof(BlockHeader) + (blockIndexesTableSize*sizeof(int)) + (inNumberOfLeaves*sizeof(LeafHeader))
-                + inNbParticles*sizeOfOneParticle;
+        const size_t memoryToAlloc = sizeof(BlockHeader)
+                                    + (blockIndexesTableSize*sizeof(int))
+                                    + (inNumberOfLeaves*sizeof(LeafHeader))
+                                    + nbParticlesAllocatedInGroup*sizeOfOneParticle;
 
         // Allocate
-        memoryBuffer = new unsigned char[memoryToAlloc];
+        memoryBuffer = (unsigned char*)FAlignedMemory::Allocate32BAligned(memoryToAlloc);
         FAssertLF(memoryBuffer);
         memset(memoryBuffer, 0, memoryToAlloc);
 
@@ -92,17 +112,18 @@ public:
         blockHeader->blockIndexesTableSize = blockIndexesTableSize;
 
         // Init particle pointers
-        positionOffset = (sizeof(FReal) * inNbParticles);
-        particlePosition[0] = reinterpret_cast<FReal*>(leafHeader + inNumberOfLeaves);
-        particlePosition[1] = (particlePosition[0] + inNbParticles);
-        particlePosition[2] = (particlePosition[1] + inNbParticles);
+        positionOffset = (sizeof(FReal) * nbParticlesAllocatedInGroup);
+        particlePosition[0] = reinterpret_cast<FReal*>((reinterpret_cast<size_t>(leafHeader + inNumberOfLeaves)
+                                                       +MemoryAlignementBytes-1) & ~(MemoryAlignementBytes-1));
+        particlePosition[1] = (particlePosition[0] + nbParticlesAllocatedInGroup);
+        particlePosition[2] = (particlePosition[1] + nbParticlesAllocatedInGroup);
 
         // Redirect pointer to data
-        attributeOffset = (sizeof(AttributeClass) * inNbParticles);
-        unsigned char* previousPointer = reinterpret_cast<unsigned char*>(particlePosition[2] + inNbParticles);
+        attributeOffset = (sizeof(AttributeClass) * nbParticlesAllocatedInGroup);
+        unsigned char* previousPointer = reinterpret_cast<unsigned char*>(particlePosition[2] + nbParticlesAllocatedInGroup);
         for(unsigned idxAttribute = 0 ; idxAttribute < NbAttributesPerParticle ; ++idxAttribute){
             particleAttributes[idxAttribute] = reinterpret_cast<AttributeClass*>(previousPointer);
-            previousPointer += sizeof(AttributeClass)*inNbParticles;
+            previousPointer += sizeof(AttributeClass)*nbParticlesAllocatedInGroup;
         }
 
         // Set all index to not used
@@ -113,7 +134,7 @@ public:
 
     /** Call the destructor of leaves and dealloc block memory */
     ~FGroupOfParticles(){
-        delete[] memoryBuffer;
+        FAlignedMemory::Dealloc32BAligned(memoryBuffer);
     }
 
     /** The index of the fist leaf (set from the constructor) */
@@ -152,13 +173,17 @@ public:
     }
 
     /** Allocate a new leaf by calling its constructor */
-    void newLeaf(const MortonIndex inIndex, const int id, const int nbParticles, const size_t offsetInGroup){
+    size_t newLeaf(const MortonIndex inIndex, const int id, const int nbParticles, const size_t offsetInGroup){
         FAssertLF(isInside(inIndex));
         FAssertLF(!exists(inIndex));
         FAssertLF(id < blockHeader->blockIndexesTableSize);
         blockIndexesTable[inIndex-blockHeader->startingIndex] = id;
         leafHeader[id].nbParticles = nbParticles;
         leafHeader[id].offSet = offsetInGroup;
+
+        const size_t nextLeafOffsetInGroup = RoundToUpperParticles(offsetInGroup+nbParticles);
+        FAssertLF(nextLeafOffsetInGroup <= size_t(nbParticlesAllocatedInGroup));
+        return nextLeafOffsetInGroup;
     }
 
     /** Iterate on each allocated leaves */
diff --git a/Src/GroupTree/FGroupTree.hpp b/Src/GroupTree/FGroupTree.hpp
index c128f8b90..579b3ad31 100644
--- a/Src/GroupTree/FGroupTree.hpp
+++ b/Src/GroupTree/FGroupTree.hpp
@@ -105,7 +105,7 @@ public:
 
                 // Initialize each cell of the block
                 int cellIdInBlock = 0;
-                int nbParticlesBeforeLeaf = 0;
+                size_t nbParticlesOffsetBeforeLeaf = 0;
                 while(cellIdInBlock != sizeOfBlock){
                     // Add cell
                     const CellClass*const oldNode = blockIteratorInOctree.getCurrentCell();
@@ -116,14 +116,13 @@ public:
                     newNode->setCoordinate(oldNode->getCoordinate());
 
                     // Add leaf
-                    newParticleBlock->newLeaf(oldNode->getMortonIndex(), cellIdInBlock,
+                    nbParticlesOffsetBeforeLeaf = newParticleBlock->newLeaf(oldNode->getMortonIndex(), cellIdInBlock,
                                               blockIteratorInOctree.getCurrentLeaf()->getSrc()->getNbParticles(),
-                                              nbParticlesBeforeLeaf);
+                                              nbParticlesOffsetBeforeLeaf);
 
                     BasicAttachedClass attachedLeaf = newParticleBlock->template getLeaf<BasicAttachedClass>(oldNode->getMortonIndex());
                     attachedLeaf.copyFromContainer(blockIteratorInOctree.getCurrentLeaf()->getSrc(), 0);
 
-                    nbParticlesBeforeLeaf += blockIteratorInOctree.getCurrentLeaf()->getSrc()->getNbParticles();
                     cellIdInBlock += 1;
                     blockIteratorInOctree.moveRight();
                 }
@@ -260,7 +259,7 @@ public:
                         sizeOfBlock, lastParticle-firstParticle);
 
                 // Init cells
-                int nbParticlesBeforeLeaf = 0;
+                size_t nbParticlesOffsetBeforeLeaf = 0;
                 for(int cellIdInBlock = 0; cellIdInBlock != sizeOfBlock ; ++cellIdInBlock){
                     newBlock->newCell(currentBlockIndexes[cellIdInBlock], cellIdInBlock);
 
@@ -271,16 +270,14 @@ public:
                     newNode->setCoordinate(coord);
 
                     // Add leaf
-                    newParticleBlock->newLeaf(currentBlockIndexes[cellIdInBlock], cellIdInBlock,
-                                              nbParticlesPerLeaf[cellIdInBlock], nbParticlesBeforeLeaf);
+                    nbParticlesOffsetBeforeLeaf = newParticleBlock->newLeaf(currentBlockIndexes[cellIdInBlock], cellIdInBlock,
+                                              nbParticlesPerLeaf[cellIdInBlock], nbParticlesOffsetBeforeLeaf);
 
                     BasicAttachedClass attachedLeaf = newParticleBlock->template getLeaf<BasicAttachedClass>(currentBlockIndexes[cellIdInBlock]);
                     // Copy each particle from the original position
                     for(int idxPart = 0 ; idxPart < nbParticlesPerLeaf[cellIdInBlock] ; ++idxPart){
                         attachedLeaf.setParticle(idxPart, particlesToSort[idxPart + firstParticle].originalIndex, inParticlesContainer);
                     }
-
-                    nbParticlesBeforeLeaf += nbParticlesPerLeaf[cellIdInBlock];
                 }
 
                 // Keep the block
-- 
GitLab