Commit 9008d211 authored by BRAMAS Berenger's avatar BRAMAS Berenger

Add blocked version of the taylor kernel

parent 730fa5b0
#ifndef FTAYLORCELLPOD_HPP
#define FTAYLORCELLPOD_HPP
#include "../../Utils/FGlobal.hpp"
#include "../../Containers/FTreeCoordinate.hpp"
#include "../StarPUUtils/FStarPUDefaultAlign.hpp"
struct alignas(FStarPUDefaultAlign::StructAlign) FTaylorCellPODCore {
MortonIndex mortonIndex;
int coordinates[3];
};
template <int P, int order>
struct alignas(FStarPUDefaultAlign::StructAlign) FTaylorCellPODPole {
//Size of Multipole Vector
static const int MultipoleSize = ((P+1)*(P+2)*(P+3))*order/6;
//Multipole vector
FReal multipole_exp[MultipoleSize];
};
template <int P, int order>
struct alignas(FStarPUDefaultAlign::StructAlign) FTaylorCellPODLocal {
//Size of Local Vector
static const int LocalSize = ((P+1)*(P+2)*(P+3))*order/6;
//Local vector
FReal local_exp[LocalSize];
};
template <int P, int order>
class FTaylorCellPOD
{
FTaylorCellPODCore* symb;
FTaylorCellPODPole<P,order>* up;
FTaylorCellPODLocal<P,order>* down;
public:
FTaylorCellPOD(FTaylorCellPODCore* inSymb, FTaylorCellPODPole<P,order>* inUp,
FTaylorCellPODLocal<P,order>* inDown): symb(inSymb), up(inUp), down(inDown){
}
FTaylorCellPOD()
: symb(nullptr), up(nullptr), down(nullptr){
}
/** To get the morton index */
MortonIndex getMortonIndex() const {
return symb->mortonIndex;
}
/** To set the morton index */
void setMortonIndex(const MortonIndex inMortonIndex) {
symb->mortonIndex = inMortonIndex;
}
/** To get the position */
FTreeCoordinate getCoordinate() const {
return FTreeCoordinate(symb->coordinates[0],
symb->coordinates[1], symb->coordinates[2]);
}
/** To set the position */
void setCoordinate(const FTreeCoordinate& inCoordinate) {
symb->coordinates[0] = inCoordinate.getX();
symb->coordinates[1] = inCoordinate.getY();
symb->coordinates[2] = inCoordinate.getZ();
}
/** To set the position from 3 FReals */
void setCoordinate(const int inX, const int inY, const int inZ) {
symb->coordinates[0] = inX;
symb->coordinates[1] = inY;
symb->coordinates[2] = inZ;
}
/** Get Multipole */
const FReal* getMultipole() const
{ return up->multipole_exp;
}
/** Get Local */
const FReal* getLocal() const{
return down->local_exp;
}
/** Get Multipole */
FReal* getMultipole(){
return up->multipole_exp;
}
/** Get Local */
FReal* getLocal(){
return down->local_exp;
}
/** To get the leading dim of a vec */
int getVectorSize() const{
return down->LocalSize;
}
/** Make it like the begining */
void resetToInitialState(){
memset(up->multipole_exp, 0, sizeof(FReal) * up->MultipoleSize);
memset(down->local_exp, 0, sizeof(FReal) * down->LocalSize);
}
};
#endif // FTAYLORCELLPOD_HPP
// Keep in private GIT
// @SCALFMM_PRIVATE
#include "../../Src/Utils/FGlobal.hpp"
#include "../../Src/GroupTree/Core/FGroupTree.hpp"
#include "../../Src/Components/FSimpleLeaf.hpp"
#include "../../Src/Containers/FVector.hpp"
#include "../../Src/Kernels/P2P/FP2PParticleContainer.hpp"
#include "../../Src/Kernels/Taylor/FTaylorKernel.hpp"
#include "../../Src/GroupTree/Taylor/FTaylorCellPOD.hpp"
#include "../../Src/Utils/FMath.hpp"
#include "../../Src/Utils/FMemUtils.hpp"
#include "../../Src/Utils/FParameters.hpp"
#include "../../Src/Files/FRandomLoader.hpp"
#include "../../Src/Files/FFmaGenericLoader.hpp"
#include "../../Src/GroupTree/Core/FGroupSeqAlgorithm.hpp"
#include "../../Src/GroupTree/Core/FGroupTaskAlgorithm.hpp"
#ifdef ScalFMM_USE_OMP4
#include "../../Src/GroupTree/Core/FGroupTaskDepAlgorithm.hpp"
#endif
#ifdef ScalFMM_USE_STARPU
#include "../../Src/GroupTree/Core/FGroupTaskStarpuAlgorithm.hpp"
#include "../../Src/GroupTree/StarPUUtils/FStarPUKernelCapacities.hpp"
#endif
#include "../../Src/GroupTree/Core/FP2PGroupParticleContainer.hpp"
#include "../../Src/Utils/FParameterNames.hpp"
#include <memory>
int main(int argc, char* argv[]){
const FParameterNames LocalOptionBlocSize { {"-bs"}, "The size of the block of the blocked tree"};
const FParameterNames LocalOptionNoValidate { {"-no-validation"}, "To avoid comparing with direct computation"};
FHelpDescribeAndExit(argc, argv, "Test the blocked tree by counting the particles.",
FParameterDefinitions::OctreeHeight,FParameterDefinitions::InputFile,
FParameterDefinitions::NbThreads,
FParameterDefinitions::NbParticles, LocalOptionBlocSize, LocalOptionNoValidate);
// Initialize the types
static const int P = 9;
typedef FTaylorCellPODCore GroupCellSymbClass;
typedef FTaylorCellPODPole<P,1> GroupCellUpClass;
typedef FTaylorCellPODLocal<P,1> GroupCellDownClass;
typedef FTaylorCellPOD<P,1> GroupCellClass;
typedef FP2PGroupParticleContainer<> GroupContainerClass;
typedef FGroupTree< GroupCellClass, GroupCellSymbClass, GroupCellUpClass, GroupCellDownClass, GroupContainerClass, 1, 4, FReal> GroupOctreeClass;
#ifdef ScalFMM_USE_STARPU
typedef FStarPUAllYesCapacities<FTaylorKernel< GroupCellClass, GroupContainerClass , P,1>> GroupKernelClass;
typedef FStarPUCpuWrapper<typename GroupOctreeClass::CellGroupClass, GroupCellClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass> GroupCpuWrapper;
typedef FGroupTaskStarPUAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupCpuWrapper > GroupAlgorithm;
#elif defined(ScalFMM_USE_OMP4)
typedef FTaylorKernel< GroupCellClass, GroupContainerClass , P,1> GroupKernelClass;
// Set the number of threads
omp_set_num_threads(FParameters::getValue(argc,argv,FParameterDefinitions::NbThreads.options, omp_get_max_threads()));
typedef FGroupTaskDepAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, GroupCellClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass > GroupAlgorithm;
#else
typedef FTaylorKernel< GroupCellClass, GroupContainerClass , P,1> GroupKernelClass;
//typedef FGroupSeqAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, GroupCellClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass > GroupAlgorithm;
typedef FGroupTaskAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, GroupCellClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass > GroupAlgorithm;
#endif
// Get params
const int NbLevels = FParameters::getValue(argc,argv,FParameterDefinitions::OctreeHeight.options, 5);
const int groupSize = FParameters::getValue(argc,argv,LocalOptionBlocSize.options, 250);
const char* const filename = FParameters::getStr(argc,argv,FParameterDefinitions::InputFile.options, "../Data/test20k.fma");
// Load the particles
//FRandomLoader loader(FParameters::getValue(argc,argv,FParameterDefinitions::NbParticles.options, 20), 1.0, FPoint(0,0,0), 0);
FFmaGenericLoader loader(filename);
FAssertLF(loader.isOpen());
FTic timer;
FP2PParticleContainer<> allParticles;
for(int idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){
FReal physicalValue;
FPoint particlePosition;
loader.fillParticle(&particlePosition, &physicalValue);
allParticles.push(particlePosition, physicalValue);
}
std::cout << "Particles loaded in " << timer.tacAndElapsed() << "s\n";
// Put the data into the tree
timer.tic();
GroupOctreeClass groupedTree(NbLevels, loader.getBoxWidth(), loader.getCenterOfBox(), groupSize, &allParticles);
groupedTree.printInfoBlocks();
std::cout << "Tree created in " << timer.tacAndElapsed() << "s\n";
// Run the algorithm
GroupKernelClass groupkernel(NbLevels, loader.getBoxWidth(), loader.getCenterOfBox());
GroupAlgorithm groupalgo(&groupedTree,&groupkernel);
timer.tic();
groupalgo.execute();
std::cout << "Kernel executed in in " << timer.tacAndElapsed() << "s\n";
// Validate the result
if(FParameters::existParameter(argc, argv, LocalOptionNoValidate.options) == false){
int offsetParticles = 0;
FReal*const allPhysicalValues = allParticles.getPhysicalValues();
FReal*const allPosX = const_cast<FReal*>( allParticles.getPositions()[0]);
FReal*const allPosY = const_cast<FReal*>( allParticles.getPositions()[1]);
FReal*const allPosZ = const_cast<FReal*>( allParticles.getPositions()[2]);
groupedTree.forEachCellLeaf<FP2PGroupParticleContainer<> >([&](GroupCellClass cellTarget, FP2PGroupParticleContainer<> * leafTarget){
const FReal*const physicalValues = leafTarget->getPhysicalValues();
const FReal*const posX = leafTarget->getPositions()[0];
const FReal*const posY = leafTarget->getPositions()[1];
const FReal*const posZ = leafTarget->getPositions()[2];
const int nbPartsInLeafTarget = leafTarget->getNbParticles();
for(int idxPart = 0 ; idxPart < nbPartsInLeafTarget ; ++idxPart){
allPhysicalValues[offsetParticles + idxPart] = physicalValues[idxPart];
allPosX[offsetParticles + idxPart] = posX[idxPart];
allPosY[offsetParticles + idxPart] = posY[idxPart];
allPosZ[offsetParticles + idxPart] = posZ[idxPart];
}
offsetParticles += nbPartsInLeafTarget;
});
FAssertLF(offsetParticles == loader.getNumberOfParticles());
FReal*const allDirectPotentials = allParticles.getPotentials();
FReal*const allDirectforcesX = allParticles.getForcesX();
FReal*const allDirectforcesY = allParticles.getForcesY();
FReal*const allDirectforcesZ = allParticles.getForcesZ();
for(int idxTgt = 0 ; idxTgt < offsetParticles ; ++idxTgt){
for(int idxMutual = idxTgt + 1 ; idxMutual < offsetParticles ; ++idxMutual){
FP2PR::MutualParticles(
allPosX[idxTgt],allPosY[idxTgt],allPosZ[idxTgt], allPhysicalValues[idxTgt],
&allDirectforcesX[idxTgt], &allDirectforcesY[idxTgt], &allDirectforcesZ[idxTgt], &allDirectPotentials[idxTgt],
allPosX[idxMutual],allPosY[idxMutual],allPosZ[idxMutual], allPhysicalValues[idxMutual],
&allDirectforcesX[idxMutual], &allDirectforcesY[idxMutual], &allDirectforcesZ[idxMutual], &allDirectPotentials[idxMutual]
);
}
}
FMath::FAccurater potentialDiff;
FMath::FAccurater fx, fy, fz;
offsetParticles = 0;
groupedTree.forEachCellLeaf<FP2PGroupParticleContainer<> >([&](GroupCellClass cellTarget, FP2PGroupParticleContainer<> * leafTarget){
const FReal*const potentials = leafTarget->getPotentials();
const FReal*const forcesX = leafTarget->getForcesX();
const FReal*const forcesY = leafTarget->getForcesY();
const FReal*const forcesZ = leafTarget->getForcesZ();
const int nbPartsInLeafTarget = leafTarget->getNbParticles();
for(int idxTgt = 0 ; idxTgt < nbPartsInLeafTarget ; ++idxTgt){
potentialDiff.add(allDirectPotentials[idxTgt + offsetParticles], potentials[idxTgt]);
fx.add(allDirectforcesX[idxTgt + offsetParticles], forcesX[idxTgt]);
fy.add(allDirectforcesY[idxTgt + offsetParticles], forcesY[idxTgt]);
fz.add(allDirectforcesZ[idxTgt + offsetParticles], forcesZ[idxTgt]);
}
offsetParticles += nbPartsInLeafTarget;
});
std::cout << "Error : Potential " << potentialDiff << "\n";
std::cout << "Error : fx " << fx << "\n";
std::cout << "Error : fy " << fy << "\n";
std::cout << "Error : fz " << fz << "\n";
}
return 0;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment