diff --git a/Src/GroupTree/Uniform/FUnifCellPOD.hpp b/Src/GroupTree/Uniform/FUnifCellPOD.hpp new file mode 100644 index 0000000000000000000000000000000000000000..9f2d046f3ac7d2302130014870bc4282f5c782f1 --- /dev/null +++ b/Src/GroupTree/Uniform/FUnifCellPOD.hpp @@ -0,0 +1,136 @@ +// @SCALFMM_PRIVATE +#ifndef FUNIFCELLPOD_HPP +#define FUNIFCELLPOD_HPP + +#include "../../Utils/FGlobal.hpp" +#include "../../Containers/FTreeCoordinate.hpp" +#include "../StarPUUtils/FStarPUDefaultAlign.hpp" +#include "../../Kernels/Uniform//FUnifTensor.hpp" +#include "../../Utils/FComplex.hpp" + +struct alignas(FStarPUDefaultAlign::StructAlign) FUnifCellPODCore { + MortonIndex mortonIndex; + int coordinates[3]; +}; + +template <int ORDER, int NRHS = 1, int NLHS = 1, int NVALS = 1> +struct alignas(FStarPUDefaultAlign::StructAlign) FUnifCellPODPole { + static const int VectorSize = TensorTraits<ORDER>::nnodes; + static const int TransformedVectorSize = (2*ORDER-1)*(2*ORDER-1)*(2*ORDER-1); + FReal multipole_exp[NRHS * NVALS * VectorSize]; //< Multipole expansion + FComplex transformed_multipole_exp[NRHS * NVALS * TransformedVectorSize]; +}; + +template <int ORDER, int NRHS = 1, int NLHS = 1, int NVALS = 1> +struct alignas(FStarPUDefaultAlign::StructAlign) FUnifCellPODLocal { + static const int VectorSize = TensorTraits<ORDER>::nnodes; + static const int TransformedVectorSize = (2*ORDER-1)*(2*ORDER-1)*(2*ORDER-1); + FComplex transformed_local_exp[NLHS * NVALS * TransformedVectorSize]; + FReal local_exp[NLHS * NVALS * VectorSize]; //< Local expansion +}; + +template <int ORDER, int NRHS = 1, int NLHS = 1, int NVALS = 1> +class FUnifCellPOD { + FUnifCellPODCore* symb; + FUnifCellPODPole<ORDER,NRHS,NLHS,NVALS>* up; + FUnifCellPODLocal<ORDER,NRHS,NLHS,NVALS>* down; + +public: + FUnifCellPOD(FUnifCellPODCore* inSymb, FUnifCellPODPole<ORDER,NRHS,NLHS,NVALS>* inUp, + FUnifCellPODLocal<ORDER,NRHS,NLHS,NVALS>* inDown): symb(inSymb), up(inUp), down(inDown){ + } + + FUnifCellPOD() + : symb(nullptr), up(nullptr), down(nullptr){ + } + + + /** To get the morton index */ + MortonIndex getMortonIndex() const { + return symb->mortonIndex; + } + + /** To set the morton index */ + void setMortonIndex(const MortonIndex inMortonIndex) { + symb->mortonIndex = inMortonIndex; + } + + /** To get the position */ + FTreeCoordinate getCoordinate() const { + return FTreeCoordinate(symb->coordinates[0], + symb->coordinates[1], symb->coordinates[2]); + } + + /** To set the position */ + void setCoordinate(const FTreeCoordinate& inCoordinate) { + symb->coordinates[0] = inCoordinate.getX(); + symb->coordinates[1] = inCoordinate.getY(); + symb->coordinates[2] = inCoordinate.getZ(); + } + + /** To set the position from 3 FReals */ + void setCoordinate(const int inX, const int inY, const int inZ) { + symb->coordinates[0] = inX; + symb->coordinates[1] = inY; + symb->coordinates[2] = inZ; + } + + /** Get Multipole */ + const FReal* getMultipole(const int inRhs) const + { return up->multipole_exp + inRhs*up->VectorSize; + } + /** Get Local */ + const FReal* getLocal(const int inRhs) const{ + return down->local_exp + inRhs*down->VectorSize; + } + + /** Get Multipole */ + FReal* getMultipole(const int inRhs){ + return up->multipole_exp + inRhs*up->VectorSize; + } + /** Get Local */ + FReal* getLocal(const int inRhs){ + return down->local_exp + inRhs*down->VectorSize; + } + + /** To get the leading dim of a vec */ + int getVectorSize() const{ + return down->VectorSize; + } + + /** Get Transformed Multipole */ + const FComplex* getTransformedMultipole(const int inRhs) const{ + return up->transformed_multipole_exp + inRhs*up->TransformedVectorSize; + } + /** Get Transformed Local */ + const FComplex* getTransformedLocal(const int inRhs) const{ + return down->transformed_local_exp + inRhs*down->TransformedVectorSize; + } + + /** Get Transformed Multipole */ + FComplex* getTransformedMultipole(const int inRhs){ + return up->transformed_multipole_exp + inRhs*up->TransformedVectorSize; + } + /** Get Transformed Local */ + FComplex* getTransformedLocal(const int inRhs){ + return down->transformed_local_exp + inRhs*down->TransformedVectorSize; + } + + /** To get the leading dim of a vec */ + int getTransformedVectorSize() const{ + return down->TransformedVectorSize; + } + + /** Make it like the begining */ + void resetToInitialState(){ + memset(up->multipole_exp, 0, sizeof(FReal) * NRHS * NVALS * up->VectorSize); + memset(down->local_exp, 0, sizeof(FReal) * NLHS * NVALS * down->VectorSize); + memset(up->transformed_multipole_exp, 0, + sizeof(FComplex) * NRHS * NVALS * up->TransformedVectorSize); + memset(down->transformed_local_exp, 0, + sizeof(FComplex) * NLHS * NVALS * down->TransformedVectorSize); + } +}; + +#endif // FUNIFCELLPOD_HPP + diff --git a/Tests/noDist/testBlockedUniform.cpp b/Tests/noDist/testBlockedUniform.cpp new file mode 100644 index 0000000000000000000000000000000000000000..46eb5618bb4f63e4f440c8dd4d6d9ff368571f28 --- /dev/null +++ b/Tests/noDist/testBlockedUniform.cpp @@ -0,0 +1,201 @@ + +// ==== CMAKE ===== +// @FUSE_BLAS +// @FUSE_FFT +// ================ +// Keep in private GIT +// @SCALFMM_PRIVATE + + +#include "../../Src/Utils/FGlobal.hpp" + +#include "../../Src/GroupTree/Core/FGroupTree.hpp" + +#include "../../Src/Components/FSimpleLeaf.hpp" +#include "../../Src/Containers/FVector.hpp" + +#include "../../Src/Kernels/P2P/FP2PParticleContainer.hpp" + +#include "Kernels/Interpolation/FInterpMatrixKernel.hpp" +#include "../../Src/Kernels/Uniform/FUnifKernel.hpp" + +#include "../../Src/GroupTree/Uniform/FUnifCellPOD.hpp" + +#include "../../Src/Utils/FMath.hpp" +#include "../../Src/Utils/FMemUtils.hpp" +#include "../../Src/Utils/FParameters.hpp" + +#include "../../Src/Files/FRandomLoader.hpp" +#include "../../Src/Files/FFmaGenericLoader.hpp" + +#include "../../Src/GroupTree/Core/FGroupSeqAlgorithm.hpp" +#include "../../Src/GroupTree/Core/FGroupTaskAlgorithm.hpp" +#ifdef ScalFMM_USE_OMP4 +#include "../../Src/GroupTree/Core/FGroupTaskDepAlgorithm.hpp" +#endif +#ifdef ScalFMM_USE_STARPU +#include "../../Src/GroupTree/Core/FGroupTaskStarpuAlgorithm.hpp" +#include "../../Src/GroupTree/StarPUUtils/FStarPUKernelCapacities.hpp" +#endif +#include "../../Src/GroupTree/Core/FP2PGroupParticleContainer.hpp" + +#include "../../Src/Utils/FParameterNames.hpp" + +#include <memory> + + +//#define RANDOM_PARTICLES + +int main(int argc, char* argv[]){ + const FParameterNames LocalOptionBlocSize { {"-bs"}, "The size of the block of the blocked tree"}; + const FParameterNames LocalOptionNoValidate { {"-no-validation"}, "To avoid comparing with direct computation"}; + FHelpDescribeAndExit(argc, argv, "Test the blocked tree by counting the particles.", + FParameterDefinitions::OctreeHeight, +#ifdef RANDOM_PARTICLES + FParameterDefinitions::NbParticles, +#else + FParameterDefinitions::InputFile, +#endif + FParameterDefinitions::NbThreads, + LocalOptionBlocSize, LocalOptionNoValidate); + + // Initialize the types + static const int ORDER = 6; + typedef FInterpMatrixKernelR MatrixKernelClass; + + typedef FUnifCellPODCore GroupCellSymbClass; + typedef FUnifCellPODPole<ORDER> GroupCellUpClass; + typedef FUnifCellPODLocal<ORDER> GroupCellDownClass; + typedef FUnifCellPOD<ORDER> GroupCellClass; + + typedef FP2PGroupParticleContainer<> GroupContainerClass; + typedef FGroupTree< GroupCellClass, GroupCellSymbClass, GroupCellUpClass, GroupCellDownClass, GroupContainerClass, 1, 4, FReal> GroupOctreeClass; +#ifdef ScalFMM_USE_STARPU + typedef FStarPUAllYesCapacities<FUnifKernel<GroupCellClass,GroupContainerClass,MatrixKernelClass,ORDER>> GroupKernelClass; + typedef FStarPUCpuWrapper<typename GroupOctreeClass::CellGroupClass, GroupCellClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass> GroupCpuWrapper; + typedef FGroupTaskStarPUAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupCpuWrapper > GroupAlgorithm; +#elif defined(ScalFMM_USE_OMP4) + typedef FUnifKernel<GroupCellClass,GroupContainerClass,MatrixKernelClass,ORDER> GroupKernelClass; + // Set the number of threads + omp_set_num_threads(FParameters::getValue(argc,argv,FParameterDefinitions::NbThreads.options, omp_get_max_threads())); + typedef FGroupTaskDepAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, GroupCellClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass > GroupAlgorithm; +#else + typedef FUnifKernel<GroupCellClass,GroupContainerClass,MatrixKernelClass,ORDER> GroupKernelClass; + //typedef FGroupSeqAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, GroupCellClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass > GroupAlgorithm; + typedef FGroupTaskAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, GroupCellClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass > GroupAlgorithm; +#endif + // Get params + const int NbLevels = FParameters::getValue(argc,argv,FParameterDefinitions::OctreeHeight.options, 5); + const int groupSize = FParameters::getValue(argc,argv,LocalOptionBlocSize.options, 250); + + // Load the particles +#ifdef RANDOM_PARTICLES + FRandomLoader loader(FParameters::getValue(argc,argv,FParameterDefinitions::NbParticles.options, 2000), 1.0, FPoint(0,0,0), 0); +#else + const char* const filename = FParameters::getStr(argc,argv,FParameterDefinitions::InputFile.options, "../Data/test20k.fma"); + FFmaGenericLoader loader(filename); +#endif + FAssertLF(loader.isOpen()); + FTic timer; + + FP2PParticleContainer<> allParticles; + for(int idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){ + FPoint particlePosition; + FReal physicalValue; +#ifdef RANDOM_PARTICLES + physicalValue = 0.10; + loader.fillParticle(&particlePosition); +#else + loader.fillParticle(&particlePosition, &physicalValue); +#endif + allParticles.push(particlePosition, physicalValue); + } + std::cout << "Particles loaded in " << timer.tacAndElapsed() << "s\n"; + + // Put the data into the tree + timer.tic(); + GroupOctreeClass groupedTree(NbLevels, loader.getBoxWidth(), loader.getCenterOfBox(), groupSize, &allParticles); + groupedTree.printInfoBlocks(); + std::cout << "Tree created in " << timer.tacAndElapsed() << "s\n"; + + // Run the algorithm + const MatrixKernelClass MatrixKernel; + GroupKernelClass groupkernel(NbLevels, loader.getBoxWidth(), loader.getCenterOfBox(), &MatrixKernel); + GroupAlgorithm groupalgo(&groupedTree,&groupkernel); + + timer.tic(); + groupalgo.execute(); + std::cout << "Kernel executed in in " << timer.tacAndElapsed() << "s\n"; + + // Validate the result + if(FParameters::existParameter(argc, argv, LocalOptionNoValidate.options) == false){ + int offsetParticles = 0; + FReal*const allPhysicalValues = allParticles.getPhysicalValues(); + FReal*const allPosX = const_cast<FReal*>( allParticles.getPositions()[0]); + FReal*const allPosY = const_cast<FReal*>( allParticles.getPositions()[1]); + FReal*const allPosZ = const_cast<FReal*>( allParticles.getPositions()[2]); + + groupedTree.forEachCellLeaf<FP2PGroupParticleContainer<> >([&](GroupCellClass cellTarget, FP2PGroupParticleContainer<> * leafTarget){ + const FReal*const physicalValues = leafTarget->getPhysicalValues(); + const FReal*const posX = leafTarget->getPositions()[0]; + const FReal*const posY = leafTarget->getPositions()[1]; + const FReal*const posZ = leafTarget->getPositions()[2]; + const int nbPartsInLeafTarget = leafTarget->getNbParticles(); + + for(int idxPart = 0 ; idxPart < nbPartsInLeafTarget ; ++idxPart){ + allPhysicalValues[offsetParticles + idxPart] = physicalValues[idxPart]; + allPosX[offsetParticles + idxPart] = posX[idxPart]; + allPosY[offsetParticles + idxPart] = posY[idxPart]; + allPosZ[offsetParticles + idxPart] = posZ[idxPart]; + } + + offsetParticles += nbPartsInLeafTarget; + }); + + FAssertLF(offsetParticles == loader.getNumberOfParticles()); + + FReal*const allDirectPotentials = allParticles.getPotentials(); + FReal*const allDirectforcesX = allParticles.getForcesX(); + FReal*const allDirectforcesY = allParticles.getForcesY(); + FReal*const allDirectforcesZ = allParticles.getForcesZ(); + + for(int idxTgt = 0 ; idxTgt < offsetParticles ; ++idxTgt){ + for(int idxMutual = idxTgt + 1 ; idxMutual < offsetParticles ; ++idxMutual){ + FP2PR::MutualParticles( + allPosX[idxTgt],allPosY[idxTgt],allPosZ[idxTgt], allPhysicalValues[idxTgt], + &allDirectforcesX[idxTgt], &allDirectforcesY[idxTgt], &allDirectforcesZ[idxTgt], &allDirectPotentials[idxTgt], + allPosX[idxMutual],allPosY[idxMutual],allPosZ[idxMutual], allPhysicalValues[idxMutual], + &allDirectforcesX[idxMutual], &allDirectforcesY[idxMutual], &allDirectforcesZ[idxMutual], &allDirectPotentials[idxMutual] + ); + } + } + + FMath::FAccurater potentialDiff; + FMath::FAccurater fx, fy, fz; + offsetParticles = 0; + groupedTree.forEachCellLeaf<FP2PGroupParticleContainer<> >([&](GroupCellClass cellTarget, FP2PGroupParticleContainer<> * leafTarget){ + const FReal*const potentials = leafTarget->getPotentials(); + const FReal*const forcesX = leafTarget->getForcesX(); + const FReal*const forcesY = leafTarget->getForcesY(); + const FReal*const forcesZ = leafTarget->getForcesZ(); + const int nbPartsInLeafTarget = leafTarget->getNbParticles(); + + for(int idxTgt = 0 ; idxTgt < nbPartsInLeafTarget ; ++idxTgt){ + potentialDiff.add(allDirectPotentials[idxTgt + offsetParticles], potentials[idxTgt]); + fx.add(allDirectforcesX[idxTgt + offsetParticles], forcesX[idxTgt]); + fy.add(allDirectforcesY[idxTgt + offsetParticles], forcesY[idxTgt]); + fz.add(allDirectforcesZ[idxTgt + offsetParticles], forcesZ[idxTgt]); + } + + offsetParticles += nbPartsInLeafTarget; + }); + + std::cout << "Error : Potential " << potentialDiff << "\n"; + std::cout << "Error : fx " << fx << "\n"; + std::cout << "Error : fy " << fy << "\n"; + std::cout << "Error : fz " << fz << "\n"; + } + + return 0; +} +