// @FUSE_BLAS // @FUSE_FFT // @FUSE_STARPU // @FUSE_MPI // Keep in private GIT #include #include #include #include //#define _VERBOSE_LEAF #define LOAD_FILE #include "Utils/FGlobal.hpp" #include "GroupTree/Core/FGroupTree.hpp" // Celui de develop #include "Components/FSimpleLeaf.hpp" // same as develop #include "Components/FSymbolicData.hpp" // same as develop #include "Containers/FVector.hpp" // same as develop #include "Utils/FMath.hpp" #include "Utils/FMemUtils.hpp" #include "Utils/FParameters.hpp" #include "Utils/FParameterNames.hpp" //#include "Files/FRandomLoader.hpp" #include "Files/FFmaGenericLoader.hpp" #include "Kernels/Interpolation/FInterpMatrixKernel.hpp" // same as develop #include "Kernels/Uniform/FUnifCell.hpp" // same as develop #include "Kernels/Uniform/FUnifKernel.hpp" // same as develop // #include "GroupTree/Core/FGroupTaskStarpuImplicitAlgorithm.hpp" // Celui de develop // #include "GroupTree/StarPUUtils/FStarPUKernelCapacities.hpp" // same as develop #include "GroupTree/StarPUUtils/FStarPUCpuWrapper.hpp" // same as develop #include "GroupTree/Core/FGroupTools.hpp" // only check leaves and cells // #include "GroupTree/Core/FP2PGroupParticleContainer.hpp" // same as develop #include "Kernels/P2P/FP2PParticleContainer.hpp" // same as develop #include "Utils/FLeafBalance.hpp" // // #include "Core/FFmmAlgorithm.hpp" #include "GroupTree/Core/FGroupTools.hpp" // Initialize the types typedef double FReal; static const int ORDER = 6; typedef FInterpMatrixKernelR MatrixKernelClass; using GroupCellClass = FUnifCell; using GroupCellUpClass = typename GroupCellClass::multipole_t; using GroupCellDownClass = typename GroupCellClass::local_expansion_t; using GroupCellSymbClass = FSymbolicData; typedef FP2PGroupParticleContainer GroupContainerClass; typedef FGroupTree< FReal, GroupCellSymbClass, GroupCellUpClass, GroupCellDownClass, GroupContainerClass, 1, 4, FReal> GroupOctreeClass; typedef FStarPUAllCpuCapacities> GroupKernelClass; typedef FStarPUCpuWrapper GroupCpuWrapper; typedef FGroupTaskStarPUImplicitAlgorithm GroupAlgorithm; #ifndef LOAD_FILE typedef FRandomLoader LoaderClass; #else typedef FFmaGenericLoader LoaderClass; #endif void sortParticle(FPoint * allParticlesToSort, int treeHeight, int groupSize, std::vector> & sizeForEachGroup, std::vector & distributedMortonIndex, LoaderClass& loader, int nproc); void createNodeRepartition(std::vector distributedMortonIndex, std::vector>>& nodeRepartition, int nproc, int treeHeight); FSize getNbParticlesPerNode(FSize mpi_count, FSize mpi_rank, FSize total); int main(int argc, char* argv[]){ const FParameterNames LocalOptionBlocSize { {"-bs"}, "The size of the block of the blocked tree" }; const FParameterNames LocalOptionNoValidate { {"-no-validation"}, "To avoid comparing with direct computation"}; FHelpDescribeAndExit(argc, argv, "Loutre", FParameterDefinitions::OctreeHeight, FParameterDefinitions::NbParticles, FParameterDefinitions::OctreeSubHeight, FParameterDefinitions::InputFile, LocalOptionBlocSize, LocalOptionNoValidate); // Get params const int NbLevels = FParameters::getValue(argc,argv,FParameterDefinitions::OctreeHeight.options, 5); const int groupSize = FParameters::getValue(argc,argv,LocalOptionBlocSize.options, 8); #ifndef STARPU_USE_MPI std::cout << "Pas de mpi -_-\" " << std::endl; #endif int mpi_rank, nproc; FMpi mpiComm(argc,argv); mpi_rank = mpiComm.global().processId(); nproc = mpiComm.global().processCount(); #ifndef LOAD_FILE std::cout << " RANDOM LOADER" << std::endl; const FSize NbParticles = FParameters::getValue(argc,argv,FParameterDefinitions::NbParticles.options, FSize(10000)); #else // Load the particles const char* const filename = FParameters::getStr(argc,argv,FParameterDefinitions::InputFile.options, "../Data/test20k.fma"); std::cout << " FMA LOADER read from " << filename << std::endl; LoaderClass loader(filename); FAssertLF(loader.isOpen()); const FSize NbParticles = loader.getNumberOfParticles(); #endif FPoint * allParticlesToSort = new FPoint[NbParticles]; //Fill particles #ifndef LOAD_FILE { FSize idxPart = 0; for(int i = 0; i < mpiComm.global().processCount(); ++i){ FSize NbParticlesPerNode = getNbParticlesPerNode(nproc, i, NbParticles); LoaderClass loader(NbParticlesPerNode, 1.0, FPoint(0,0,0), i); FAssertLF(loader.isOpen()); for(FSize j= 0 ; j < NbParticlesPerNode ; ++j){ loader.fillParticle(&allParticlesToSort[idxPart]);//Same with file or not ++idxPart; } } } LoaderClass loader(NbParticles, 1.0, FPoint(0,0,0)); #else for(FSize idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){ FReal physicalValue = 0.1; loader.fillParticle(&allParticlesToSort[idxPart], &physicalValue);//Same with file or not } #endif std::vector distributedMortonIndex; std::vector> sizeForEachGroup; sortParticle(allParticlesToSort, NbLevels, groupSize, sizeForEachGroup, distributedMortonIndex, loader, nproc); FP2PParticleContainer allParticles; for(FSize idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){ FReal physicalValue = 0.1; allParticles.push(allParticlesToSort[idxPart], physicalValue); } // Put the data into the tree std::cout << std::endl<< " Group size at the leaf level " << std::endl ; int totalLeaves = 0 ; for ( std::size_t idLevel=2; idLevel< sizeForEachGroup.size() ; ++idLevel){ std::cout << " Group size at level " << idLevel << std::endl ; totalLeaves = 0 ; for ( auto v : sizeForEachGroup[idLevel]){ totalLeaves += v; std::cout << " " << v ; } std::cout << std::endl ;std::cout << " Total number of leaves: " < ContainerClass; typedef FSimpleLeaf LeafClass; typedef FUnifCell CellClass; typedef FOctree OctreeClass; typedef FUnifKernel KernelClass; typedef FFmmAlgorithm FmmClass; const FReal epsilon = 1E-10; OctreeClass treeCheck(NbLevels, 2,loader.getBoxWidth(),loader.getCenterOfBox()); for(FSize idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){ // put in tree treeCheck.insert(allParticlesToSort[idxPart], 0.1); } MatrixKernelClass MatrixKernelValidation; KernelClass kernels(NbLevels, loader.getBoxWidth(), loader.getCenterOfBox(), &MatrixKernelValidation); FmmClass algorithm(&treeCheck, &kernels); algorithm.execute(operationsToProceed); std::string fileName("output-Block-") ; fileName += std::to_string(nproc) + "-" + std::to_string(mpi_rank) + ".fma" ; groupTree::saveSolutionInFile(fileName, loader.getNumberOfParticles(), treeCheck) ; groupTree::checkCellTree(groupedTree, groupalgo, treeCheck, epsilon) ; groupTree::checkLeaves(groupedTree, groupalgo, treeCheck, epsilon) ; std::cout << "Comparing is over" << std::endl; } std::cout << "Done" << std::endl; return 0; } void sortParticle(FPoint * allParticles, int treeHeight, int groupSize, std::vector> & sizeForEachGroup, std::vector & distributedMortonIndex, LoaderClass& loader, int nproc) { //Structure pour trier struct ParticleSortingStruct{ FPoint position; MortonIndex mindex; }; // Création d'un tableau de la structure pour trier puis remplissage du tableau const FSize nbParticles = loader.getNumberOfParticles(); ParticleSortingStruct* particlesToSort = new ParticleSortingStruct[nbParticles]; for(FSize idxPart = 0 ; idxPart < nbParticles ; ++idxPart){ const FTreeCoordinate host = FCoordinateComputer::GetCoordinateFromPosition(loader.getCenterOfBox(), loader.getBoxWidth(), treeHeight, allParticles[idxPart]); const MortonIndex particleIndex = host.getMortonIndex(); particlesToSort[idxPart].mindex = particleIndex; particlesToSort[idxPart].position = allParticles[idxPart]; } //Trie du nouveau tableau FQuickSort::QsOmp(particlesToSort, nbParticles, [](const ParticleSortingStruct& v1, const ParticleSortingStruct& v2){ return v1.mindex <= v2.mindex; }); //Replace tout dans l'ordre dans le tableau d'origine for(FSize idxPart = 0 ; idxPart < nbParticles ; ++idxPart){ allParticles[idxPart] = particlesToSort[idxPart].position; } //Compte le nombre de feuilles sizeForEachGroup.resize(treeHeight); MortonIndex previousLeaf = -1; int numberOfLeaf = 0; for(FSize idxPart = 0 ; idxPart < nbParticles ; ++idxPart) { if(particlesToSort[idxPart].mindex != previousLeaf) { previousLeaf = particlesToSort[idxPart].mindex; ++numberOfLeaf; } } //Calcul de la taille des groupes au niveau des feuilles FLeafBalance balancer; for(int processId = 0; processId < nproc; ++processId) { FSize size_last; FSize countGroup; FSize leafOnProcess = balancer.getRight(numberOfLeaf, nproc, processId) - balancer.getLeft(numberOfLeaf, nproc, processId); size_last = leafOnProcess%groupSize; countGroup = (leafOnProcess - size_last)/groupSize; for(int i = 0; i < countGroup; ++i) sizeForEachGroup[treeHeight-1].push_back(groupSize); if(size_last > 0) sizeForEachGroup[treeHeight-1].push_back((int)size_last); } //Calcul du working interval au niveau des feuilles previousLeaf = -1; int countLeaf = 0; int processId = 0; FSize leafOnProcess = balancer.getRight(numberOfLeaf, nproc, 0) - balancer.getLeft(numberOfLeaf, nproc, 0); distributedMortonIndex.push_back(previousLeaf); for(FSize idxPart = 0 ; idxPart < nbParticles ; ++idxPart) { if(particlesToSort[idxPart].mindex != previousLeaf) { previousLeaf = particlesToSort[idxPart].mindex; ++countLeaf; if(countLeaf == leafOnProcess) { distributedMortonIndex.push_back(previousLeaf); distributedMortonIndex.push_back(previousLeaf); countLeaf = 0; ++processId; leafOnProcess = balancer.getRight(numberOfLeaf, nproc, processId) - balancer.getLeft(numberOfLeaf, nproc, processId); } } } distributedMortonIndex.push_back(particlesToSort[nbParticles - 1].mindex); std::cout << "Morton distribution to build the duplicated tree " <>> nodeRepartition; createNodeRepartition(distributedMortonIndex, nodeRepartition, nproc, treeHeight); for ( std::size_t idLevel=0; idLevel< nodeRepartition.size() ; ++idLevel){ std::cout << " nodeRepartition at level " << idLevel << std::endl ; for ( std::size_t procID=0 ; procID< nodeRepartition[idLevel].size(); ++procID){ std::cout << " n proc( " << procID << " ) " << " [ " << nodeRepartition[idLevel][procID][0] << ", " << nodeRepartition[idLevel][procID][1] <<" ]" <= 0; --idxLevel) { processId = 0; int countParticleInTheGroup = 0; MortonIndex previousMortonCell = -1; //cout << "Compute Level " << idxLevel << endl; for(int idxPart = 0; idxPart < nbParticles; ++idxPart) { MortonIndex mortonCell = (particlesToSort[idxPart].mindex) >> (3*(treeHeight - 1 - idxLevel)); if(mortonCell <= nodeRepartition[idxLevel][processId][1]) //Si l'indice est dans le working interval { if(mortonCell != previousMortonCell) //Si c'est un nouvelle indice { ++countParticleInTheGroup; //On le compte dans le groupe previousMortonCell = mortonCell; if(countParticleInTheGroup == groupSize) //Si le groupe est plein on ajoute le compte { sizeForEachGroup[idxLevel].push_back(groupSize); countParticleInTheGroup = 0; } } } else //Si l'on change d'interval de process on ajoute ce que l'on a compté { if(countParticleInTheGroup > 0) sizeForEachGroup[idxLevel].push_back(countParticleInTheGroup); countParticleInTheGroup = 1; previousMortonCell = mortonCell; ++processId; } } if(countParticleInTheGroup > 0) sizeForEachGroup[idxLevel].push_back(countParticleInTheGroup); } // Print group size per level std::cout << std::endl<< " Group size at the leaf level " << std::endl ; int totalLeaves = 0 ; for ( std::size_t idLevel=2; idLevel< sizeForEachGroup.size() ; ++idLevel){ std::cout << " Group size at level " << idLevel << std::endl ; totalLeaves = 0 ; for ( auto v : sizeForEachGroup[idLevel]){ totalLeaves += v; std::cout << " " << v ; } std::cout << std::endl ;std::cout << " Total number of leaves: " < distributedMortonIndex, std::vector>>& nodeRepartition, int nproc, int treeHeight) { nodeRepartition.resize(treeHeight, std::vector>(nproc, std::vector(2))); for(int node_id = 0; node_id < nproc; ++node_id){ nodeRepartition[treeHeight-1][node_id][0] = distributedMortonIndex[node_id*2]; nodeRepartition[treeHeight-1][node_id][1] = distributedMortonIndex[node_id*2+1]; } for(int idxLevel = treeHeight - 2; idxLevel >= 0 ; --idxLevel){ nodeRepartition[idxLevel][0][0] = nodeRepartition[idxLevel+1][0][0] >> 3; nodeRepartition[idxLevel][0][1] = nodeRepartition[idxLevel+1][0][1] >> 3; for(int node_id = 1; node_id < nproc; ++node_id){ nodeRepartition[idxLevel][node_id][0] = FMath::Max(nodeRepartition[idxLevel+1][node_id][0] >> 3, nodeRepartition[idxLevel][node_id-1][0]+1); //Berenger phd :) nodeRepartition[idxLevel][node_id][1] = nodeRepartition[idxLevel+1][node_id][1] >> 3; } } } FSize getNbParticlesPerNode(FSize mpi_count, FSize mpi_rank, FSize total){ if(mpi_rank < (total%mpi_count)) return ((total - (total%mpi_count))/mpi_count)+1; return ((total - (total%mpi_count))/mpi_count); }