diff --git a/Src/Core/FFmmAlgorithmThreadProc.hpp b/Src/Core/FFmmAlgorithmThreadProc.hpp index 3202dca07d00b82985c5893ecec9b440a172b16e..0dd73950e572e3eebf61250e4af5f019628b4a47 100644 --- a/Src/Core/FFmmAlgorithmThreadProc.hpp +++ b/Src/Core/FFmmAlgorithmThreadProc.hpp @@ -804,25 +804,60 @@ public: FDEBUG( FDebug::Controller.write("\tStart Direct Pass\n").write(FDebug::Flush); ); FDEBUG(FTic counterTime); + // init + const int LeafIndex = OctreeHeight - 1; + const int SizeShape = 3*3*3; + int shapeLeaf[SizeShape]; + OctreeIterator* shapeArray[SizeShape]; + for(int idxShape = 0 ; idxShape < SizeShape ; ++idxShape){ + shapeLeaf[idxShape] = 0; + } + + // split data { OctreeIterator octreeIterator(tree); octreeIterator.gotoBottomLeft(); + // remove useless leafs for(int idxLeaf = 0 ; idxLeaf < this->leafLeft ; ++idxLeaf){ octreeIterator.moveRight(); } + // to store which shape for each leaf + int* const shapeType = new int [this->leafRight - this->leafLeft + 1]; + for(int idxLeaf = this->leafLeft ; idxLeaf <= this->leafRight ; ++idxLeaf){ iterArray[idxLeaf] = octreeIterator; + + const MortonIndex index = octreeIterator.getCurrentGlobalIndex(); + FTreeCoordinate coord; + coord.setPositionFromMorton(index, LeafIndex); + const int shape = (coord.getX()%3)*9 + (coord.getY()%3)*3 + (coord.getZ()%3); + shapeType[idxLeaf-this->leafLeft] = shape; + ++shapeLeaf[shape]; + octreeIterator.moveRight(); } + + // init iter array + int countShape[SizeShape]; + for(int idxShape = 0 ; idxShape < SizeShape ; ++idxShape){ + shapeArray[idxShape] = new OctreeIterator[shapeLeaf[idxShape]]; + countShape[idxShape] = 0; + } + + // store leafs + for(int idxLeaf = this->leafLeft ; idxLeaf <= this->leafRight ; ++idxLeaf){ + const int idxShape = shapeType[idxLeaf - this->leafLeft]; + shapeArray[idxShape][countShape[idxShape]++] = iterArray[idxLeaf]; + } + + delete[] shapeType; } FDEBUG(FTic computationCounter); - const int LeafIndex = OctreeHeight - 1; const int startIdx = this->leafLeft; - const int endIdx = this->leafRight + 1; #pragma omp parallel { @@ -830,17 +865,20 @@ public: // There is a maximum of 26 neighbors FList<ParticleClass*>* neighbors[26]; - #pragma omp for - for(int idxLeafs = startIdx ; idxLeafs < endIdx ; ++idxLeafs){ - myThreadkernels->L2P(iterArray[idxLeafs].getCurrentCell(), iterArray[idxLeafs].getCurrentListTargets()); - // need the current particles and neighbors particles - const int counter = tree->getLeafsNeighbors(neighbors, iterArray[idxLeafs].getCurrentGlobalIndex(),LeafIndex); - myThreadkernels->P2P( iterArray[idxLeafs].getCurrentListTargets(), iterArray[idxLeafs].getCurrentListSrc() , neighbors, counter); + for(int idxShape = 0 ; idxShape < SizeShape ; ++idxShape){ + const int leafAtThisShape = shapeLeaf[idxShape]; + + #pragma omp for + for(int idxLeafs = startIdx ; idxLeafs < leafAtThisShape ; ++idxLeafs){ + myThreadkernels->L2P(shapeArray[idxShape][idxLeafs].getCurrentCell(), shapeArray[idxShape][idxLeafs].getCurrentListTargets()); + // need the current particles and neighbors particles + const int counter = tree->getLeafsNeighbors(neighbors, shapeArray[idxShape][idxLeafs].getCurrentGlobalIndex(),LeafIndex); + myThreadkernels->P2P( shapeArray[idxShape][idxLeafs].getCurrentListTargets(), shapeArray[idxShape][idxLeafs].getCurrentListSrc() , neighbors, counter); + } } } FDEBUG(computationCounter.tac()); - FDEBUG( FDebug::Controller << "\tFinished (@Direct Pass (P2P) = " << counterTime.tacAndElapsed() << "s)\n" ); FDEBUG( FDebug::Controller << "\t\t Computation : " << computationCounter.elapsed() << " s\n" ); FTRACE( FTrace::Controller.leaveFunction(FTrace::FMM) );