From 790b64401aac25c2f5dc8e64ea8c7a5b7bd231b0 Mon Sep 17 00:00:00 2001 From: Olivier Coulaud <Olivier.Coulaud@inria.fr> Date: Tue, 5 Jun 2018 17:06:10 +0200 Subject: [PATCH] Bug with BPC is fixed --- Examples/MPIInterpolationFMM.hpp | 23 +- Examples/sharedMemoryInterpolationFMM.hpp | 5 +- NEWS.txt | 2 +- README.md | 11 +- Src/Core/FFmmAlgorithmPeriodic.hpp | 54 ++- Src/Core/FFmmAlgorithmThreadProcPeriodic.hpp | 446 ++++++++++++------- 6 files changed, 338 insertions(+), 203 deletions(-) diff --git a/Examples/MPIInterpolationFMM.hpp b/Examples/MPIInterpolationFMM.hpp index 27dffbf3a..e120c085f 100644 --- a/Examples/MPIInterpolationFMM.hpp +++ b/Examples/MPIInterpolationFMM.hpp @@ -32,7 +32,6 @@ #include "Utils/FParameters.hpp" #include "Utils/FParameterNames.hpp" - // // Order of the Interpolation approximation static constexpr unsigned ORDER = 6 ; @@ -115,6 +114,7 @@ int main(int argc, char* argv[]) // Initialize values for MPI FMpi app(argc,argv); + const bool masterIO = ( app.global().processId() == 0 ); // // Initialize timer FTic time; @@ -137,7 +137,7 @@ int main(int argc, char* argv[]) FSize localParticlesNumber = 0 ; // ----------------------------------------------------- - if(app.global().processId() == 0){ + if(masterIO){ std::cout << "Loading & Inserting " << loader.getNumberOfParticles() << " particles ..." << std::endl <<" Box: "<< std::endl @@ -214,7 +214,7 @@ int main(int argc, char* argv[]) MPI_Reduce(&timeUsed,&minTime,1,MPI_DOUBLE,MPI_MIN,0,app.global().getComm()); MPI_Reduce(&timeUsed,&maxTime,1,MPI_DOUBLE,MPI_MAX,0,app.global().getComm()); - if(app.global().processId() == 0){ + if(masterIO){ std::cout << "readinsert-time-min:" << minTime << " readinsert-time-max:" << maxTime << std::endl; @@ -225,8 +225,9 @@ int main(int argc, char* argv[]) FAbstractAlgorithm * algorithm = nullptr; FAlgorithmTimers * timer = nullptr; { // ----------------------------------------------------- - std::cout << "\n"<<interpolationType<<" FMM Proc (ORDER="<< ORDER << ") ... " << std::endl; - + if(masterIO) { + std::cout << "\n"<<interpolationType<<" FMM Proc (ORDER="<< ORDER << ") ... " << std::endl; + } time.tic(); // Kernels to use (pointer because of the limited size of the stack) @@ -239,8 +240,10 @@ int main(int argc, char* argv[]) // // periodic FMM algorithm FmmClassProcPER algoPer(app.global(),&tree, aboveTree); + KernelClass kernelsPer(algoPer.extendedTreeHeight(), algoPer.extendedBoxWidth(), algoPer.extendedBoxCenter(),&MatrixKernel); + algoPer.setKernel(&kernelsPer); /////////////////////////////////////////////////////////////////////////////////////////////////// if(! periodicCondition) {// Non periodic case @@ -252,12 +255,12 @@ int main(int argc, char* argv[]) timer = &algoPer ; } // - // FMM exectution FFmmFarField FFmmNearField + // FMM exectution FFmmFarField FFmmNearField FFmmP2M|FFmmM2M|FFmmM2L|FFmmL2L algorithm->execute(); // algorithm->getMortonLeafDistribution(mortonLeafDistribution); - // if(app.global().processId() == 0) + if(masterIO) { std::cout << app.global().processId() <<" Morton distribution "<< std::endl ; for(auto v : mortonLeafDistribution) @@ -268,10 +271,10 @@ int main(int argc, char* argv[]) app.global().barrier(); time.tac(); timeUsed = time.elapsed(); - std::cout << "Done " << "(@Algorithm = " << time.elapsed() << " s)." << std::endl; MPI_Reduce(&timeUsed,&minTime,1,MPI_DOUBLE,MPI_MIN,0,app.global().getComm()); MPI_Reduce(&timeUsed,&maxTime,1,MPI_DOUBLE,MPI_MAX,0,app.global().getComm()); - if(app.global().processId() == 0){ + if(masterIO){ + std::cout << "Done " << "(@Algorithm = " << time.elapsed() << " s)." << std::endl; std::cout << "exec-time-min:" << minTime << " exec-time-max:" << maxTime << std::endl; @@ -331,7 +334,7 @@ int main(int argc, char* argv[]) }); FReal gloEnergy = app.global().reduceSum(energy); FReal TotalPhysicalValue = app.global().reduceSum(locTotalPhysicalValue); - if(0 == app.global().processId()){ + if(masterIO){ std::cout <<std::endl<<"aboveRoot: " << aboveTree << " Energy: "<< gloEnergy <<" TotalPhysicalValue: " << TotalPhysicalValue<< std::endl; std::cout <<std::endl <<" &&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&& "<<std::endl<<std::endl; } diff --git a/Examples/sharedMemoryInterpolationFMM.hpp b/Examples/sharedMemoryInterpolationFMM.hpp index d597994b0..35eac6677 100644 --- a/Examples/sharedMemoryInterpolationFMM.hpp +++ b/Examples/sharedMemoryInterpolationFMM.hpp @@ -198,6 +198,7 @@ int main(int argc, char* argv[]) timer = &algoPer ; } + // algorithm->execute(); // Here the call of the FMM algorithm // @@ -250,10 +251,6 @@ int main(int argc, char* argv[]) << " Pos "<<posX[idxPart]<<" "<<posY[idxPart]<<" "<<posZ[idxPart] << " Forces: " << forcesX[idxPart] << " " << forcesY[idxPart] << " "<< forcesZ[idxPart] <<std::endl; } -// if(numLeaf==288){ -// std::cout << count << " idxPart " << idxPart << potentials[idxPart]<<" "<< -// physicalValues[idxPart] << " energy " << energy << std::endl; -// } energy += potentials[idxPart]*physicalValues[idxPart] ; TotalPhysicalValue += physicalValues[idxPart] ; // ++count ; diff --git a/NEWS.txt b/NEWS.txt index 719d4c8ca..c634a175a 100644 --- a/NEWS.txt +++ b/NEWS.txt @@ -19,7 +19,7 @@ version 1.1 - Improvement in CmakeLists - FUSE is working again - - Compile with Intel and CLang (with a lot of warnings) - Now morse_cmake is a git submodule -- +- Fix several bugs in pseudo-periodic algorithms (sequential and parallel) 1.5 diff --git a/README.md b/README.md index 743afd1e8..654ca2af1 100644 --- a/README.md +++ b/README.md @@ -35,8 +35,17 @@ To obtain ScalFMM (develop branch) and its git submodules do git clone --recursive git@gitlab.inria.fr:solverstack/ScalFMM.git -b develop ``` -``` bash +or + +```bash +git clone git@gitlab.inria.fr:solverstack/ScalFMM.git +cd ScalFMM +git submodule init +git submodule update + +``` # Move to the build folder +``` bash cd scalfmm/Build # Use cmake, with relevant options cmake .. # -DSCALFMM_USE_MPI=ON diff --git a/Src/Core/FFmmAlgorithmPeriodic.hpp b/Src/Core/FFmmAlgorithmPeriodic.hpp index 792036996..0aaaba787 100644 --- a/Src/Core/FFmmAlgorithmPeriodic.hpp +++ b/Src/Core/FFmmAlgorithmPeriodic.hpp @@ -198,12 +198,14 @@ protected: if(operationsToProceed & FFmmP2M) bottomPass(); - if(operationsToProceed & FFmmM2M) upwardPass(); + if(operationsToProceed & FFmmM2M) { + upwardPass(); + // before downward pass we have to perform the periodicity + this->processPeriodicLevels(); + } if(operationsToProceed & FFmmM2L){ transferPass(); - // before downward pass we have to perform the periodicity - processPeriodicLevels(); } if(operationsToProceed & FFmmL2L) downardPass(); @@ -561,10 +563,10 @@ protected: // upperCells[offsetRealTree-1] is root cell CellClass*const upperCells = new CellClass[offsetRealTree]; for(int i = 0; i < offsetRealTree; ++i) { - upperCells[i].setLevel(i+1); // TODO: check that the level is correct (see other todos under) + upperCells[i].setLevel(i+1); } - { + { // Build Expansion at level 0 typename OctreeClass::Iterator octreeIterator(tree); octreeIterator.gotoLeft(); @@ -584,7 +586,7 @@ protected: std::array<const symbolic_data_t*, 8> level_1_symbolics; std::transform(children, children+8, level_1_symbolics.begin(), [](CellClass* c) {return c;}); - // TODO check that parent symbolic has the right level + // kernels->M2M( real_tree_root_multipole, real_tree_root_symbolic, @@ -598,6 +600,7 @@ protected: // Copy virtual cell at given level into virtual children FMemUtils::setall(children,&upperCells[idxLevel],8); + multipole_t* const virtual_parent_multipole = &((&upperCells[idxLevel-1])->getMultipoleData()); const symbolic_data_t* const virtual_parent_symbolic @@ -618,7 +621,9 @@ protected: ); } } - + // + // M2L PASS + // CellClass*const downerCells = new CellClass[offsetRealTree]; for(int i = 0; i < offsetRealTree; ++i) { downerCells[i].setLevel(i+1); // TODO: check that the level is correct (see other todos under) @@ -627,8 +632,8 @@ protected: { const int idxUpperLevel = 2; - const CellClass* neighbors[342]; - int neighborPositions[342]; + const CellClass* neighbors[342]{}; + int neighborPositions[342]{}; int counter = 0; // The cells are all the same, duplicate the central cell and @@ -637,7 +642,7 @@ protected: for(int idxY = -3 ; idxY <= 2 ; ++idxY){ for(int idxZ = -3 ; idxZ <= 2 ; ++idxZ){ if( FMath::Abs(idxX) > 1 || FMath::Abs(idxY) > 1 || FMath::Abs(idxZ) > 1){ - neighbors[counter] = &upperCells[idxUpperLevel-1]; + neighbors[counter] = &upperCells[idxUpperLevel-1]; neighborPositions[counter] = neighIndex(idxX,idxY,idxZ); ++counter; } @@ -649,13 +654,13 @@ protected: = &(downerCells[idxUpperLevel-1].getLocalExpansionData()); const symbolic_data_t* const target_symbolic = &downerCells[idxUpperLevel-1]; - std::array<const multipole_t*, 342> neighbor_multipoles; - std::array<const symbolic_data_t*, 342> neighbor_symbolics; + + std::array<const multipole_t*, 342> neighbor_multipoles{}; + std::array<const symbolic_data_t*, 342> neighbor_symbolics{}; std::transform(neighbors, neighbors+counter, neighbor_multipoles.begin(), [](const CellClass* c) {return &(c->getMultipoleData());}); std::transform(neighbors, neighbors+counter, neighbor_symbolics.begin(), [](const CellClass* c) {return c;}); - // compute M2L kernels->M2L( target_local_exp, @@ -665,8 +670,11 @@ protected: neighborPositions, counter); } + + // note: the triple loop bounds are not the same than for the previous piece of code // which handles the topmost virtual cell + for(int idxUpperLevel = 3 ; idxUpperLevel <= offsetRealTree ; ++idxUpperLevel){ const CellClass* neighbors[342]; int neighborPositions[342]; @@ -675,7 +683,7 @@ protected: for(int idxY = -2 ; idxY <= 3 ; ++idxY){ for(int idxZ = -2 ; idxZ <= 3 ; ++idxZ){ if( FMath::Abs(idxX) > 1 || FMath::Abs(idxY) > 1 || FMath::Abs(idxZ) > 1){ - neighbors[counter] = &upperCells[idxUpperLevel-1]; + neighbors[counter] = &upperCells[idxUpperLevel-1]; neighborPositions[counter] = neighIndex(idxX,idxY,idxZ); ++counter; } @@ -687,6 +695,7 @@ protected: = &(downerCells[idxUpperLevel-1].getLocalExpansionData()); const symbolic_data_t* const target_symbolic = &downerCells[idxUpperLevel-1]; + std::array<const multipole_t*, 342> neighbor_multipoles; std::array<const symbolic_data_t*, 342> neighbor_symbolics; std::transform(neighbors, neighbors+counter, neighbor_multipoles.begin(), @@ -706,10 +715,11 @@ protected: neighborPositions, counter); } - - // Run the L2L for all but the lowest virtual levels + // 2 is the highest level + // offsetRealTree-1 is the root level { + std::array<local_expansion_t*, 8> virtual_child_local_exps = {}; std::array<symbolic_data_t*, 8> virtual_child_symbolics = {}; for(int idxLevel = 2 ; idxLevel < offsetRealTree-1 ; ++idxLevel){ @@ -732,16 +742,17 @@ protected: } } // Run the L2L for the lowest virtual level - { - std::array<local_expansion_t*, 8> virtual_child_local_exps = {}; - std::array<symbolic_data_t*, 8> virtual_child_symbolics = {}; + if(offsetRealTree >2){ + std::array<local_expansion_t*, 8> virtual_child_local_exps {}; + std::array<symbolic_data_t*, 8> virtual_child_symbolics {}; const int idxLevel = offsetRealTree-1; local_expansion_t* const virtual_parent_local_exp = &(downerCells[idxLevel-1].getLocalExpansionData()); const symbolic_data_t* const virtual_parent_symbolic = &downerCells[idxLevel-1]; + virtual_child_local_exps[7] = &(downerCells[idxLevel].getLocalExpansionData()); - virtual_child_symbolics[7] = &downerCells[idxLevel]; + virtual_child_symbolics[7] = &downerCells[idxLevel]; kernels->L2L( virtual_parent_local_exp, virtual_parent_symbolic, @@ -750,7 +761,7 @@ protected: ); } - // Run L2L from the lowest vitual level to the highest real tree level + // Run L2L from the lowest virtual level to the highest real tree level { typename OctreeClass::Iterator octreeIterator(tree); octreeIterator.gotoLeft(); @@ -777,7 +788,6 @@ protected: child_symbolics.data() ); } - delete[] upperCells; delete[] downerCells; } diff --git a/Src/Core/FFmmAlgorithmThreadProcPeriodic.hpp b/Src/Core/FFmmAlgorithmThreadProcPeriodic.hpp index 361037b44..cd39d40d5 100644 --- a/Src/Core/FFmmAlgorithmThreadProcPeriodic.hpp +++ b/Src/Core/FFmmAlgorithmThreadProcPeriodic.hpp @@ -60,27 +60,32 @@ */ template<class FReal, class OctreeClass, class CellClass, class ContainerClass, class KernelClass, class LeafClass, class P2PExclusionClass = FP2PMiddleExclusion> class FFmmAlgorithmThreadProcPeriodic : public FAbstractAlgorithm, public FAlgorithmTimers { - OctreeClass* const tree; ///< The octree to work on - KernelClass** kernels; ///< The kernels - const FMpi::FComm& comm; ///< MPI communicator + using multipole_t = typename CellClass::multipole_t; + using local_expansion_t = typename CellClass::local_expansion_t; + using symbolic_data_t = CellClass; - CellClass rootCellFromProc; ///< root of tree needed by the periodicity - const int nbLevelsAboveRoot; ///< The nb of level the user ask to go above the tree (>= -1) - const int offsetRealTree; ///< nbLevelsAboveRoot GetFakeLevel + OctreeClass* const tree; ///< The octree to work on + KernelClass** kernels; ///< The kernels + + const int OctreeHeight; ///< Tree height + const int nbLevelsAboveRoot; ///< The nb of level the user ask to go above the tree (>= -1) + const int offsetRealTree; ///< nbLevelsAboveRoot+2 GetFakeLevel + const int leafLevelSeparationCriteria; + CellClass rootCellFromProc; ///< root cell of tree needed by the periodicity typename OctreeClass::Iterator* iterArray; ///< Used to store pointers to cells/leafs to work with typename OctreeClass::Iterator* iterArrayComm; ///< Used to store pointers to cells/leafs to send/rcv + const FMpi::FComm& comm; ///< MPI communicator + int numberOfLeafs; ///< To store the size at the previous level const int MaxThreads; ///< Max number of thread allowed by openmp const int nbProcess; ///< Process count const int idProcess; ///< Current process id - const int OctreeHeight; ///< Tree height const int userChunkSize; - const int leafLevelSeparationCriteria; struct Interval{ MortonIndex leftIndex; @@ -94,10 +99,6 @@ class FFmmAlgorithmThreadProcPeriodic : public FAbstractAlgorithm, public FAlgor public: - using multipole_t = typename CellClass::multipole_t; - using local_expansion_t = typename CellClass::local_expansion_t; - using symbolic_data_t = CellClass; - /// Get the Morton index Distribution at the leaf level /// @@ -186,16 +187,16 @@ public: const int inLeafLevelSeperationCriteria = 1) : tree(inTree) , kernels(nullptr), - comm(inComm), + OctreeHeight(tree->getHeight()), nbLevelsAboveRoot(inUpperLevel), offsetRealTree(inUpperLevel + 2), + leafLevelSeparationCriteria(inLeafLevelSeperationCriteria), + comm(inComm), numberOfLeafs(0), MaxThreads(FEnv::GetValue("SCALFMM_ALGO_NUM_THREADS",omp_get_max_threads())), nbProcess(inComm.processCount()), idProcess(inComm.processId()), - OctreeHeight(tree->getHeight()), userChunkSize(inUserChunkSize), - leafLevelSeparationCriteria(inLeafLevelSeperationCriteria), intervals(new Interval[inComm.processCount()]), workingIntervalsPerLevel(new Interval[inComm.processCount() * tree->getHeight()]) { @@ -439,7 +440,9 @@ protected: Timers[L2LTimer].tac(); Timers[NearTimer].tic(); - if((operationsToProceed & FFmmP2P) || (operationsToProceed & FFmmL2P)) directPass((operationsToProceed & FFmmP2P),(operationsToProceed & FFmmL2P)); + if((operationsToProceed & FFmmP2P) || (operationsToProceed & FFmmL2P)) { + directPass((operationsToProceed & FFmmP2P),(operationsToProceed & FFmmL2P)); + } Timers[NearTimer].tac(); tree->forEachCell([this](CellClass* node){ @@ -504,7 +507,9 @@ protected: // Upward ///////////////////////////////////////////////////////////////////////////// - /** M2M */ + /** M2M + Build multipole expansion from height-1 to 1 +*/ void upwardPass(){ FLOG( FLog::Controller.write("\tStart Upward Pass\n").write(FLog::Flush); ); FLOG(FTic counterTime); @@ -782,102 +787,151 @@ protected: ////////////////////////////////////////////////////////////////// //Periodicity ////////////////////////////////////////////////////////////////// + /// + /// Build the mutipole and the local expansion ine the root cell + /// octreeIterator = typename OctreeClass::Iterator(tree); if( idProcess == 0){ + // Build request to receive the size of the data int iterRequestsSize = 0; - +// bool receiveData = false ; for(int idxProc = 1 ; idxProc < nbProcess ; ++idxProc ){ if( procHasWorkAtLevel(1,idxProc) ){ + // receiveData = true ; MPI_Irecv(&recvBufferSize[iterRequestsSize], 1, FMpi::GetType(recvBufferSize[iterRequestsSize]), idxProc, FMpi::TagFmmM2MSize, comm.getComm(), &requests[iterRequestsSize]); iterRequestsSize += 1; FAssertLF(iterRequestsSize <= 7); } } - MPI_Waitall( iterRequestsSize, requests, MPI_STATUSES_IGNORE); + //if(receiveData) + // { + // Build the rootCell Level if we don't have all data + MPI_Waitall( iterRequestsSize, requests, MPI_STATUSES_IGNORE); + // Build request to receive the data + + int iterRequests = 0; + + for(int idxProc = 1 ; idxProc < nbProcess ; ++idxProc ){ + if( procHasWorkAtLevel(1,idxProc) ){ + recvBuffer[iterRequests].cleanAndResize(recvBufferSize[iterRequests]); + FAssertLF(recvBufferSize[iterRequests] < std::numeric_limits<int>::max()); + MPI_Irecv(recvBuffer[iterRequests].data(), int(recvBufferSize[iterRequests]), MPI_BYTE, idxProc, + FMpi::TagFmmM2M, comm.getComm(), &requests[iterRequests]); + iterRequests += 1; + FAssertLF(iterRequests <= 7); + } + } - int iterRequests = 0; + MPI_Waitall( iterRequests, requests, MPI_STATUSES_IGNORE); - for(int idxProc = 1 ; idxProc < nbProcess ; ++idxProc ){ - if( procHasWorkAtLevel(1,idxProc) ){ - recvBuffer[iterRequests].cleanAndResize(recvBufferSize[iterRequests]); - FAssertLF(recvBufferSize[iterRequests] < std::numeric_limits<int>::max()); - MPI_Irecv(recvBuffer[iterRequests].data(), int(recvBufferSize[iterRequests]), MPI_BYTE, idxProc, - FMpi::TagFmmM2M, comm.getComm(), &requests[iterRequests]); - iterRequests += 1; - FAssertLF(iterRequests <= 7); - } - } + CellClass* currentChild[8]; + memcpy(currentChild, octreeIterator.getCurrentBox(), 8 * sizeof(CellClass*)); - MPI_Waitall( iterRequests, requests, MPI_STATUSES_IGNORE); + // retreive data and merge my child and the child from others + int counterProc = 0; + for(int idxProc = 1 ; idxProc < nbProcess ; ++idxProc){ + if( procHasWorkAtLevel(1,idxProc) ){ + recvBuffer[counterProc].seek(0); + unsigned state = unsigned(recvBuffer[counterProc].getValue<unsigned char>()); - CellClass* currentChild[8]; - memcpy(currentChild, octreeIterator.getCurrentBox(), 8 * sizeof(CellClass*)); + int position = 0; + int positionToInsert = 0; + while( state && position < 8){ + while(!(state & 0x1)){ + state >>= 1; + ++position; + } + FAssertLF(positionToInsert < 7); + FAssertLF(position < 8); + FAssertLF(!currentChild[position], "Already has a cell here"); - // retreive data and merge my child and the child from others - int counterProc = 0; - for(int idxProc = 1 ; idxProc < nbProcess ; ++idxProc){ - if( procHasWorkAtLevel(1,idxProc) ){ - recvBuffer[counterProc].seek(0); - unsigned state = unsigned(recvBuffer[counterProc].getValue<unsigned char>()); + recvBufferCells[positionToInsert].deserializeUp(recvBuffer[counterProc]); + + currentChild[position] = (CellClass*) &recvBufferCells[positionToInsert]; - int position = 0; - int positionToInsert = 0; - while( state && position < 8){ - while(!(state & 0x1)){ state >>= 1; - ++position; + position += 1; + positionToInsert += 1; } - FAssertLF(positionToInsert < 7); - FAssertLF(position < 8); - FAssertLF(!currentChild[position], "Already has a cell here"); - - recvBufferCells[positionToInsert].deserializeUp(recvBuffer[counterProc]); - currentChild[position] = (CellClass*) &recvBufferCells[positionToInsert]; - - state >>= 1; - position += 1; - positionToInsert += 1; + FAssertLF(recvBuffer[counterProc].tell() == recvBufferSize[counterProc]); + counterProc += 1; } - - FAssertLF(recvBuffer[counterProc].tell() == recvBufferSize[counterProc]); - counterProc += 1; } - } - - - multipole_t* parent_multipole - = &(rootCellFromProc.getMultipoleData()); - const symbolic_data_t* parent_symbolic - = &(rootCellFromProc); - - std::array<const multipole_t*, 8> child_multipoles {}; - std::transform(currentChild, currentChild + 8, - std::begin(child_multipoles), - [](const CellClass* c) { - return (c == nullptr) - ? nullptr - : &(c->getMultipoleData()); - }); - std::array<const symbolic_data_t*, 8> child_symbolics {}; - - - + // } + + +#ifdef USEQALGO + // set the reecived +// std::copy(currentChild, currentChild+8, +// std::begin(child_symbolics)); + typename OctreeClass::Iterator octreeIterator(tree); + octreeIterator.gotoLeft(); + +// multipole_t* const real_tree_root_multipole +// = &((&upperCells[offsetRealTree-1])->getMultipoleData()); +// const symbolic_data_t* const real_tree_root_symbolic +// = &(upperCells[offsetRealTree-1]); + + CellClass** children = octreeIterator.getCurrentBox(); + + std::transform(currentChild, currentChild + 8, + std::begin(children), + [](const CellClass* c) { + return (c == nullptr) + ? nullptr + : &(c->getMultipoleData()); + }); +// std::copy(currentChild, currentChild+8, +// std::begin(child_symbolics)); + // std::copy(currentChild, currentChild+8, +// std::begin(children)); + this->processPeriodicLevelsSEQ(); + +#else + // Build expansion at the rootCellFromProc + multipole_t* parent_multipole = &(rootCellFromProc.getMultipoleData()); + const symbolic_data_t* parent_symbolic = &(rootCellFromProc); + // + std::array<const multipole_t*, 8> child_multipoles {}; + std::transform(currentChild, currentChild + 8, + std::begin(child_multipoles), + [](const CellClass* c) { + return (c == nullptr) + ? nullptr + : &(c->getMultipoleData()); + }); + std::array<const symbolic_data_t*, 8> child_symbolics {}; + std::copy(currentChild, currentChild+8, + std::begin(child_symbolics)); + + /// + /// // (*kernels[0]).M2M( &rootCellFromProc , currentChild, offsetRealTree); + /// + (*kernels[0]).M2M( + parent_multipole, + parent_symbolic, + child_multipoles.data(), + child_symbolics.data() + ); - // (*kernels[0]).M2M( &rootCellFromProc , currentChild, offsetRealTree); - (*kernels[0]).M2M( - parent_multipole, - parent_symbolic, - child_multipoles.data(), - child_symbolics.data() - ); + /// Now we have the multipole on the root level - processPeriodicLevels(); + /// + /// Treat at root level the periodicity + /// + /// + /// Treat at root level the periodicity + /// + this->processPeriodicLevels(); +#endif } else { + // OTHER PROCESS (NOT 0) + // if( hasWorkAtLevel(1) ){ const int firstChild = getWorkingInterval(1, idProcess).leftIndex & 7; const int lastChild = getWorkingInterval(1, idProcess).rightIndex & 7; @@ -899,6 +953,7 @@ protected: MPI_Send(sendBuffer.data(), int(sendBuffer.getSize()), MPI_BYTE, 0, FMpi::TagFmmM2M, comm.getComm()); } } + /////////////////////////////////////////////////////////////////////////// } ///////////////////////////////////////////////////////////////////////////// @@ -1392,13 +1447,17 @@ protected: FAssertLF(recvBuffer.getSize() == sizeOfSerialization); recvBuffer.seek(0); } - + // all processors apply L2L(rootCellFromProc) and store the local expansion in child_local_expansions + // That is an alias of the local expansion n the octree at level 1 + // { const local_expansion_t* parent_local_expansion = &(rootCellFromProc.getLocalExpansionData()); const symbolic_data_t* parent_symbolic = &(rootCellFromProc); + std::cout << " >> L2L F rootCellFromProc -> Tree Level1" <<std::endl; + // In fact child_local_expansions[i] is an alias of &(children[i]->getLocalExpansionData()) CellClass ** children = octreeIterator.getCurrentBox(); std::array<local_expansion_t*, 8> child_local_expansions {}; std::transform(children, children + 8, @@ -1409,8 +1468,9 @@ protected: : &(c->getLocalExpansionData()); }); std::array<const symbolic_data_t*, 8> child_symbolics {}; - std::copy(children, children + 8, std::begin(child_symbolics)); - + // std::copy(children, children + 8, std::begin(child_symbolics)); + std::transform(children, children+8, child_symbolics.begin(), + [](CellClass* c) {return c;}); kernels[0]->L2L( parent_local_expansion, @@ -1970,18 +2030,18 @@ protected: for(int idxNeig = 0 ; idxNeig < counter ; ++idxNeig){ if( !offsets[idxNeig].equals(0,0,0) ){ // We need to move the cell // - // Put periodic cell into another cell (copy data) - periodicNeighbors[periodicNeighborsCounter] = new ContainerClass(*(neighbors[idxNeig])) ; + // Put periodic cell into another cell (copy data) + periodicNeighbors[periodicNeighborsCounter] = new ContainerClass(*(neighbors[idxNeig])) ; // newPos = pos + ofsset - FReal*const positionsX = periodicNeighbors[periodicNeighborsCounter]->getPositions()[0]; - FReal*const positionsY = periodicNeighbors[periodicNeighborsCounter]->getPositions()[1]; - FReal*const positionsZ = periodicNeighbors[periodicNeighborsCounter]->getPositions()[2]; + FReal*const positionsX = periodicNeighbors[periodicNeighborsCounter]->getPositions()[0]; + FReal*const positionsY = periodicNeighbors[periodicNeighborsCounter]->getPositions()[1]; + FReal*const positionsZ = periodicNeighbors[periodicNeighborsCounter]->getPositions()[2]; FReal xoffset= boxWidth * FReal(offsets[idxNeig].getX()) ; FReal yoffset= boxWidth * FReal(offsets[idxNeig].getY()) ; FReal zoffset= boxWidth * FReal(offsets[idxNeig].getZ()) ; - for(FSize idxPart = 0; idxPart < periodicNeighbors[periodicNeighborsCounter]->getNbParticles() ; ++idxPart){ - // std::cout << " Xo " << positionsX[idxPart] << " " << positionsY[idxPart] << " " << positionsZ[idxPart] <<std::endl; - // std::cout << " Offset " << xoffset << " " << yoffset << " " << zoffset <<std::endl; + for(FSize idxPart = 0; idxPart < periodicNeighbors[periodicNeighborsCounter]->getNbParticles() ; ++idxPart){ + // std::cout << " Xo " << positionsX[idxPart] << " " << positionsY[idxPart] << " " << positionsZ[idxPart] <<std::endl; + // std::cout << " Offset " << xoffset << " " << yoffset << " " << zoffset <<std::endl; positionsX[idxPart] += xoffset; positionsY[idxPart] += yoffset; @@ -1990,37 +2050,37 @@ protected: } // periodicOffsets[periodicNeighborsCounter] = offsets[idxNeig]; - periodicNeighborPositions[periodicNeighborsCounter] = neighborPositions[idxNeig]; - ++periodicNeighborsCounter; - } - else{ // set non periodic cells in neighbors and in neighborPositions - // nonPeriodicCounter versus idxNeig-periodicNeighborsCounter - // std::cout << nonPeriodicCounter <<" "<< idxNeig-periodicNeighborsCounter <<std::endl; - neighbors[idxNeig-periodicNeighborsCounter] = neighbors[idxNeig]; - neighborPositions[idxNeig-periodicNeighborsCounter] = neighborPositions[idxNeig]; - ++nonPeriodicCounter ; - } - } - // - // Treat periodic interaction without mutual interaction - // - myThreadkernels->P2PRemote(currentIter.coord,currentIter.targets, - currentIter.sources, periodicNeighbors, - periodicNeighborPositions, periodicNeighborsCounter); - - for(int idxNeig = 0 ; idxNeig < periodicNeighborsCounter ; ++idxNeig){ + periodicNeighborPositions[periodicNeighborsCounter] = neighborPositions[idxNeig]; + ++periodicNeighborsCounter; + } + else{ // set non periodic cells in neighbors and in neighborPositions + // nonPeriodicCounter versus idxNeig-periodicNeighborsCounter + // std::cout << nonPeriodicCounter <<" "<< idxNeig-periodicNeighborsCounter <<std::endl; + neighbors[idxNeig-periodicNeighborsCounter] = neighbors[idxNeig]; + neighborPositions[idxNeig-periodicNeighborsCounter] = neighborPositions[idxNeig]; + ++nonPeriodicCounter ; + } + } + // + // Treat periodic interaction without mutual interaction + // + myThreadkernels->P2PRemote(currentIter.coord,currentIter.targets, + currentIter.sources, periodicNeighbors, + periodicNeighborPositions, periodicNeighborsCounter); + + for(int idxNeig = 0 ; idxNeig < periodicNeighborsCounter ; ++idxNeig){ delete periodicNeighbors[idxNeig] ; } - } - // - // Now treat P2P interaction with non periodic cells - // Like in non periodic algorithm !! - // - myThreadkernels->P2P( currentIter.coord, currentIter.targets, - currentIter.sources, neighbors, - neighborPositions, counter-periodicNeighborsCounter); - } - } + } + // + // Now treat P2P interaction with non periodic cells + // Like in non periodic algorithm !! + // + myThreadkernels->P2P( currentIter.coord, currentIter.targets, + currentIter.sources, neighbors, + neighborPositions, counter-periodicNeighborsCounter); + } + } } } @@ -2390,14 +2450,29 @@ public: * Then the M2L by taking into account the periodicity directions * Then the border by using the precomputed M2M * Finally the L2L + * The algorithm above the root can be spited in 4 steps: + * begin{itemize} + * \item Upward pass, for level $0$ to $-d$ we compute the $2^{DIM}$ $M2M$ ($8$ per level in $3D$). + * \item Transfer pass, for level $0$ to $-d$ we compute the $6^{DIM}-3^{DIM}$ $M2L$ ($189$ per level in $3D$). + * For level $0$ to $-d+1$ we put the cell at position $(1,1,1)$ relatively to its parent. + * For the last $M2L$ at level $-d$ we put the cell at position $(0,0,0)$ relatively to its parent. + * \item Border pass, at level $-d-1$ we compute the border and apply it using $M2L$. + * \item Downward pass, from level $-d-1$ to $0$ we perform one $L2L$. + * There is only one sub-cell per $L2L$ and the position has to be the same as the one used during the Transfer pass. + * \end{itemize} + * */ + void processPeriodicLevels(){ FLOG( FLog::Controller.write("\tStart Periodic Pass\n").write(FLog::Flush); ); FLOG(FTic counterTime); + // the root level of the octree in the virtual array of cells + const int rootLevel = offsetRealTree-1 ; + rootCellFromProc.setLevel(rootLevel+1); if( nbLevelsAboveRoot != -1 ){ // we will use offsetRealTree-1 cells but for simplicity allocate offsetRealTree - // upperCells[offsetRealTree-1] is root cell + // upperCells[offsetRealTree-1] is the root cell std::unique_ptr<multipole_t[]> virtual_multipoles (new multipole_t[offsetRealTree] {}); @@ -2409,14 +2484,23 @@ public: for(int i = 0; i < offsetRealTree; ++i) { virtual_symbolics[i].setLevel(i+1); } - - { - const int idxLevel = offsetRealTree-1; - multipole_t* parent_multipole - = &(virtual_multipoles[idxLevel-1]); + ///////////////////////////////////////////////////////////////// + /// Build all multipoles in the fake tree + /// M2M operator + if(rootLevel > 1){ + //fill at rootLevel-1 + // Proceed as follows because I don(t know how to set + // virtual_symbolics[rootLevel]) = rootCellFromProc + // virtual_multipoles[rootLevel] = rootCellFromProc.getMultipoleData() + // + + multipole_t * parent_multipole + = &(virtual_multipoles[rootLevel-1]); symbolic_data_t* parent_symbolic - = &(virtual_symbolics[idxLevel-1]); + = &(virtual_symbolics[rootLevel-1]); + + // Fill children with root expansion std::array<const multipole_t*, 8> child_multipoles {}; std::fill(std::begin(child_multipoles), std::end(child_multipoles), &(rootCellFromProc.getMultipoleData())); @@ -2430,11 +2514,12 @@ public: child_multipoles.data(), child_symbolics.data() ); + } { - std::array<const multipole_t*, 8> child_multipoles {}; - std::array<const symbolic_data_t*, 8> child_symbolics {}; - for(int idxLevel = offsetRealTree-1-1 ; idxLevel > 1 ; --idxLevel){ + std::array<const multipole_t*, 8> child_multipoles {}; + std::array<const symbolic_data_t*, 8> child_symbolics {}; + for(int idxLevel = rootLevel -1 ; idxLevel > 1 ; --idxLevel){ multipole_t* parent_multipole = &(virtual_multipoles[idxLevel-1]); symbolic_data_t* parent_symbolic @@ -2451,27 +2536,38 @@ public: ); } } + ///////////////////////////////////////////////////////////////// + /// M2L + /// + /// For the highest level the cell is set at position (0,0,0) relatively to its parent + /// but for all other level the location is (1,1,1) (i.e index 7 in the height cells) + /// + /// - { - const int idxUpperLevel = 2; + if(rootLevel > 1){ + // Build a virtual environment for the M2L at the topmost level + const int idxUpperLevel = 2; local_expansion_t* target_local_expansion = &(virtual_local_expansions[idxUpperLevel-1]); const symbolic_data_t* target_symbolic = &(virtual_symbolics[idxUpperLevel-1]); - std::array<const multipole_t*, 342> source_multipoles {}; + std::array<const multipole_t*, 342> source_multipoles {}; std::array<const symbolic_data_t*, 342> source_symbolics {}; - int neighborPositions[342]; + int neighborPositions[342]{}; int counter = 0; + + // The cells are all the same, duplicate the central cell and + // create fake neighbour positions for(int idxX = -3 ; idxX <= 2 ; ++idxX){ for(int idxY = -3 ; idxY <= 2 ; ++idxY){ for(int idxZ = -3 ; idxZ <= 2 ; ++idxZ){ if( FMath::Abs(idxX) > 1 || FMath::Abs(idxY) > 1 || FMath::Abs(idxZ) > 1) { source_multipoles[counter] = &virtual_multipoles[idxUpperLevel-1]; - source_symbolics[counter] = &virtual_symbolics[idxUpperLevel-1]; - neighborPositions[counter] = neighIndex(idxX,idxY,idxZ); + source_symbolics[counter] = &virtual_symbolics[idxUpperLevel-1]; + neighborPositions[counter] = this->neighIndex(idxX,idxY,idxZ); ++counter; } } @@ -2487,14 +2583,17 @@ public: counter ); } + // note: the triple loop bounds are not the same than for the previous piece of code + // which handles the topmost virtual cell for(int idxUpperLevel = 3 ; idxUpperLevel <= offsetRealTree-1 ; ++idxUpperLevel) { + local_expansion_t* target_local_expansion = &(virtual_local_expansions[idxUpperLevel-1]); const symbolic_data_t* target_symbolic = &(virtual_symbolics[idxUpperLevel-1]); - std::array<const multipole_t*, 342> source_multipoles {}; + std::array<const multipole_t*, 342> source_multipoles {}; std::array<const symbolic_data_t*, 342> source_symbolics {}; int neighborPositions[342]; @@ -2504,7 +2603,7 @@ public: for(int idxZ = -2 ; idxZ <= 3 ; ++idxZ){ if( FMath::Abs(idxX) > 1 || FMath::Abs(idxY) > 1 || FMath::Abs(idxZ) > 1) { source_multipoles[counter] = &virtual_multipoles[idxUpperLevel-1]; - source_symbolics[counter] = &virtual_symbolics[idxUpperLevel-1]; + source_symbolics[counter] = &virtual_symbolics[idxUpperLevel-1]; neighborPositions[counter] = neighIndex(idxX,idxY,idxZ); ++counter; } @@ -2522,22 +2621,30 @@ public: ); } { + // M2L at the root level + int startIdx = -2, endIdx = 3 ; + if(rootLevel == 1) { // rootCell is the highest level + startIdx = -3, endIdx = 2 ; + } + local_expansion_t* target_local_expansion = &(rootCellFromProc.getLocalExpansionData()); const symbolic_data_t* target_symbolic = &(rootCellFromProc); +// std::cout << " >> M2L on rootCellFromProc "<< " level: " << target_symbolic->getLevel() +// << startIdx << " " << endIdx << std::endl; - std::array<const multipole_t*, 342> source_multipoles {}; + std::array<const multipole_t*, 342> source_multipoles {}; std::array<const symbolic_data_t*, 342> source_symbolics {}; int neighborPositions[342]; int counter = 0; - for(int idxX = -2 ; idxX <= 3 ; ++idxX) { - for(int idxY = -2 ; idxY <= 3 ; ++idxY) { - for(int idxZ = -2 ; idxZ <= 3 ; ++idxZ) { + for(int idxX = startIdx ; idxX <= endIdx ; ++idxX){ + for(int idxY = startIdx ; idxY <= endIdx ; ++idxY){ + for(int idxZ = startIdx ; idxZ <= endIdx ; ++idxZ){ if( FMath::Abs(idxX) > 1 || FMath::Abs(idxY) > 1 || FMath::Abs(idxZ) > 1) { source_multipoles[counter] = &(rootCellFromProc.getMultipoleData()); - source_symbolics[counter] = &rootCellFromProc; + source_symbolics[counter] = &rootCellFromProc; neighborPositions[counter] = neighIndex(idxX,idxY,idxZ); ++counter; } @@ -2554,20 +2661,28 @@ public: counter ); - } - + ///////////////////////////////////////////////////////////////// + /// + /// L2L operator + /// two steps + /// 1) Run the L2L for all levels except for the lowest virtual level + /// We only compute expansion for cell 0=(0,0,0) + /// 2) Run L2L on the true root level of tree (rootlevel in the full octree + /// We only compute expansion for cell 7=(1,1,1) + /// { std::array<local_expansion_t*, 8> child_local_expansions {}; std::array<const symbolic_data_t*, 8> child_symbolics {}; - for(int idxLevel = 2 ; idxLevel <= offsetRealTree-1-1 ; ++idxLevel){ + + for(int idxLevel = 2 ; idxLevel <= rootLevel-1 ; ++idxLevel){ const local_expansion_t* parent_local_expansion = &(virtual_local_expansions[idxLevel-1]); const symbolic_data_t* parent_symbolic = &(virtual_symbolics[idxLevel-1]); child_local_expansions[0] = &(virtual_local_expansions[idxLevel]); - child_symbolics[0] = &(virtual_symbolics[idxLevel]); + child_symbolics[0] = &(virtual_symbolics[idxLevel]); kernels[0]->L2L( parent_local_expansion, parent_symbolic, @@ -2576,35 +2691,36 @@ public: ); } } - { - const int idxLevel = offsetRealTree-1; + if(offsetRealTree >2){ // L2L from rootLevel+1 to level rootLevel + // const int idxLevel = offsetRealTree-1; const local_expansion_t* parent_local_expansion - = &(virtual_local_expansions[idxLevel-1]); + = &(virtual_local_expansions[rootLevel-1]); const symbolic_data_t* parent_symbolic - = &(virtual_symbolics[idxLevel-1]); - - std::array<local_expansion_t*, 8> child_local_expansions - { &(rootCellFromProc.getLocalExpansionData()) }; - std::array<const symbolic_data_t*, 8> child_symbolics - { &(rootCellFromProc) }; - + = &(virtual_symbolics[rootLevel-1]); + // only the 7 element of the array is initialized the ofther are + // set to the default value (nullptr) + std::array<local_expansion_t*, 8> child_local_expansions{}; + std::array<const symbolic_data_t*, 8> child_symbolics{} ; + // + child_local_expansions[7] = &(rootCellFromProc.getLocalExpansionData()) ; + child_symbolics[7] = &(rootCellFromProc) ; kernels[0]->L2L( parent_local_expansion, parent_symbolic, child_local_expansions.data(), child_symbolics.data() ); - } - // L2L from 0 to level 1 + } + // Run L2L from the lowest vitual level to the highest real tree level + // Done in the classical downardPass() } FLOG( FLog::Controller << "\tFinished (@Periodic = " << counterTime.tacAndElapsed() << " s)\n" ); } - }; -- GitLab