diff --git a/Addons/CKernelApi/Src/CScalfmmApi.h b/Addons/CKernelApi/Src/CScalfmmApi.h index 2f4d454ca4650c4cb0080b95bed963f7843491bd..cdc559cf91a17627daecf734ff75b0c446fa9437 100644 --- a/Addons/CKernelApi/Src/CScalfmmApi.h +++ b/Addons/CKernelApi/Src/CScalfmmApi.h @@ -90,7 +90,8 @@ typedef enum kernel_type { typedef enum scalfmm_algorithm_config { sequential = 0, /* Use the sequential version of Scalfmm*/ multi_thread = 1, /* Use the Multi thread version of Scalfmm*/ - periodic = 2 /* Use the periodic version of Scalfmm*/ + periodic = 2, /* Use the periodic version of Scalfmm*/ + source_target = 3 /* USe the source/target algorithm */ } scalfmm_algorithm; @@ -110,7 +111,7 @@ typedef void* scalfmm_handle; * Every data will be stored in order to crete later through a builder * what is needed for the simulation */ -scalfmm_handle scalfmm_init( scalfmm_kernel_type KernelType); +scalfmm_handle scalfmm_init( scalfmm_kernel_type KernelType,scalfmm_algorithm algo); ///////////////////////////////////////////////////////////////////// @@ -128,8 +129,9 @@ scalfmm_handle scalfmm_init( scalfmm_kernel_type KernelType); * @param tree_position int[3] position inside the tree (number of boxes in * each direction) * @param spatial_position double[3] center of the cell + * @param inDatas user generic pointer to kernel. */ -typedef void* (*Callback_init_cell)(int level, long long morton_index, int* tree_position, double* spatial_position); +typedef void* (*Callback_init_cell)(int level, long long morton_index, int* tree_position, double* spatial_position, void * inDatas); /** * Function to destroy what have bee initialized by the user (should @@ -160,33 +162,67 @@ typedef struct User_Scalfmm_Cell_Descriptor{ void scalfmm_build_tree(scalfmm_handle handle,int TreeHeight,double BoxWidth,double* BoxCenter,Scalfmm_Cell_Descriptor user_cell_descriptor); +/** + * @brief This enum flag is to know if function calling will deal with + * source, target or both + */ +typedef enum particule_type{ + SOURCE=0, + TARGET=1, + BOTH=2 +}PartType; /** - * @brief This function insert an array of position into the octree + * @brief This function insert alongside to position an arbitrary + * number of attributes. + * @param Handle scalfmm_handle provided by scalfmm_init. + * @param NbPartToInsert number of particles to be inserted + * @param nbAttributeToInsert number of attribute to insert (this + * number will be > 3 (because we need at least 3 doubles for + * position)) + * @param strideForEachAtt How to get each attribute for each particles + * @param rawDatas datas to be read + * + * Example : + struct part{ + double[3] position; + double charge; + double test; //not used + double coeff; + }; + Then nbAttributeToInsert will be 3+1+1 + and strideForEachAtt will be : [0,1,2,3,5] + */ +void scalfmm_tree_abstract_insert(scalfmm_handle Handle, int NbPartToInsert, int nbAttributeToInsert, int * strideForEachAtt, + double* rawDatas); + + +/** + * @brief This function insert an array of position into the + * octree. THis fonction will insert particules with no SOURCE/TARGET + * type. * @param Handle scalfmm_handle provided by scalfmm_init. * @param NbPositions Number of position to be inserted * @param arrayX Array containing the X coordinate for all the parts, size : NbPositions * @param arrayY Array containing the Y coordinate for all the parts, size : NbPositions * @param arrayZ Array containing the Z coordinate for all the parts, size : NbPositions - * + * @param type : type to insert * The parts will be inserted with their indices inside the * array. Each index will be unique. * - * In case several calls are performed to scalfmm_tree_insert_arrays, - * first call with N particles, and then with M particles. - * The second call particles will have index from [N ; N+M]. * * Default physical values, potential and forces are set to 0. */ -void scalfmm_tree_insert_particles(scalfmm_handle Handle, int NbPositions, double * arrayX, double * arrayY, double * arrayZ); +void scalfmm_tree_insert_particles(scalfmm_handle Handle, int NbPositions, double * arrayX, double * arrayY, double * arrayZ, PartType type); /** * This function is equivalent to scalfmm_tree_insert_particles * but the given array XYZ should contains a triple value per paticles. */ -void scalfmm_tree_insert_particles_xyz(scalfmm_handle Handle, int NbPositions, double * XYZ); +void scalfmm_tree_insert_particles_xyz(scalfmm_handle Handle, int NbPositions, double * XYZ, PartType type); + /** * @brief This function set the physical values of all the particles @@ -196,17 +232,17 @@ void scalfmm_tree_insert_particles_xyz(scalfmm_handle Handle, int NbPositions, d * inserted. * @param physicalValues Array containing the physical values to be * associated to each parts. - * + * @param type : type of the particules to be setted. * The physical values will be stored according to their indices in * the array. First particle inserted will take value physicalValues[0]. */ -void scalfmm_set_physical_values(scalfmm_handle Handle, int nbPhysicalValues, double * physicalValues); +void scalfmm_set_physical_values(scalfmm_handle Handle, int nbPhysicalValues, double * physicalValues, PartType type); /** * @brief get the physical values. * * WARNING : the user must allocate (and initialize) the array given */ -void scalfmm_get_physical_values(scalfmm_handle Handle, int nbPhysicalValues, double * physicalValues); +void scalfmm_get_physical_values(scalfmm_handle Handle, int nbPhysicalValues, double * physicalValues, PartType type); /** * @@ -215,6 +251,7 @@ void scalfmm_get_physical_values(scalfmm_handle Handle, int nbPhysicalValues, do * @param idxOfParticles an array of indexes of size nbPhysicalValues to know which particles * to set the values to. * @param physicalValues the physical values. + * @param type : type of the particules to be setted. * * For example to set the physical values to particles 0 and 1 to values 1.1 and 1.4: * @code nbPhysicalValues = 2; @@ -224,9 +261,9 @@ void scalfmm_get_physical_values(scalfmm_handle Handle, int nbPhysicalValues, do * Be aware that such approach requiere to find particles in the tree which can have high cost. */ void scalfmm_set_physical_values_npart(scalfmm_handle Handle, int nbPhysicalValues, - int* idxOfParticles, double * physicalValues); + int* idxOfParticles, double * physicalValues, PartType type); void scalfmm_get_physical_values_npart(scalfmm_handle Handle, int nbPhysicalValues, - int* idxOfParticles, double * physicalValues); + int* idxOfParticles, double * physicalValues, PartType type); /** @@ -236,14 +273,17 @@ void scalfmm_get_physical_values_npart(scalfmm_handle Handle, int nbPhysicalValu * to the number of parts inserted) * @param forcesToFill array of size nbParts*3, that will contains the * forces. WARNING : User must allocate the array before call. - * + * @param type : type of the particules to be setted. * Forces will be stored sequentially, according to the indices in the * array. (ie fx1,fy1,fz1,fx2,fy2,fz2,fx3 ....) + * @param idxOfParticles : array of indices of the particles + * wanted. If used, then the parts will be given in the order of + * idxOfParticles */ -void scalfmm_get_forces_xyz(scalfmm_handle Handle, int nbParts, double * forcesToFill); -void scalfmm_get_forces_xyz_npart(scalfmm_handle Handle, int nbParts, int* idxOfParticles, double * forcesToFill); -void scalfmm_get_forces(scalfmm_handle Handle, int nbParts, double * fX, double* fY, double* fZ); -void scalfmm_get_forces_npart(scalfmm_handle Handle, int nbParts, int* idxOfParticles, double * fX, double* fY, double* fZ); +void scalfmm_get_forces_xyz(scalfmm_handle Handle, int nbParts, double * forcesToFill, PartType type); +void scalfmm_get_forces_xyz_npart(scalfmm_handle Handle, int nbParts, int* idxOfParticles, double * forcesToFill, PartType type); +void scalfmm_get_forces(scalfmm_handle Handle, int nbParts, double * fX, double* fY, double* fZ, PartType type); +void scalfmm_get_forces_npart(scalfmm_handle Handle, int nbParts, int* idxOfParticles, double * fX, double* fY, double* fZ, PartType type); /** @@ -257,12 +297,12 @@ void scalfmm_get_forces_npart(scalfmm_handle Handle, int nbParts, int* idxOfPart * forces . WARNING : User must allocate the array before call. * @param forcesZ array of size nbParts, that will contains the * forces . WARNING : User must allocate the array before call. - * + * @param type : type of the particules to be setted. */ -void scalfmm_set_forces_xyz(scalfmm_handle Handle, int nbParts, double * forcesToFill); -void scalfmm_set_forces_xyz_npart(scalfmm_handle Handle, int nbParts, int* idxOfParticles, double * forcesToFill); -void scalfmm_set_forces(scalfmm_handle Handle, int nbParts, double * fX, double* fY, double* fZ); -void scalfmm_set_forces_npart(scalfmm_handle Handle, int nbParts, int* idxOfParticles, double * fX, double* fY, double* fZ); +void scalfmm_set_forces_xyz(scalfmm_handle Handle, int nbParts, double * forcesToFill, PartType type); +void scalfmm_set_forces_xyz_npart(scalfmm_handle Handle, int nbParts, int* idxOfParticles, double * forcesToFill, PartType type); +void scalfmm_set_forces(scalfmm_handle Handle, int nbParts, double * fX, double* fY, double* fZ, PartType type); +void scalfmm_set_forces_npart(scalfmm_handle Handle, int nbParts, int* idxOfParticles, double * fX, double* fY, double* fZ, PartType type); /** @@ -272,14 +312,15 @@ void scalfmm_set_forces_npart(scalfmm_handle Handle, int nbParts, int* idxOfPart * to the number of parts inserted) * @param potentialsToFill array of potentials to be filled. WARNING : * User must allocate the array before call. + * @param type : type of the particules to be setted. * * Potentials will be stored sequentially, according to the indices in the * array. */ -void scalfmm_get_potentials(scalfmm_handle Handle, int nbParts, double * potentialsToFill); -void scalfmm_set_potentials(scalfmm_handle Handle, int nbParts, double * potentialsToRead); -void scalfmm_get_potentials_npart(scalfmm_handle Handle, int nbParts, int* idxOfParticles, double * potentialsToFill); -void scalfmm_set_potentials_npart(scalfmm_handle Handle, int nbParts, int* idxOfParticles, double * potentialsToRead); +void scalfmm_get_potentials(scalfmm_handle Handle, int nbParts, double * potentialsToFill, PartType type); +void scalfmm_set_potentials(scalfmm_handle Handle, int nbParts, double * potentialsToRead, PartType type); +void scalfmm_get_potentials_npart(scalfmm_handle Handle, int nbParts, int* idxOfParticles, double * potentialsToFill, PartType type); +void scalfmm_set_potentials_npart(scalfmm_handle Handle, int nbParts, int* idxOfParticles, double * potentialsToRead, PartType type); /** @@ -290,11 +331,12 @@ void scalfmm_set_potentials_npart(scalfmm_handle Handle, int nbParts, int* idxOf * number of parts inserted) * @param updatedXYZ array of displacement (ie * dx1,dy1,dz1,dx2,dy2,dz2,dx3 ...) + * @param type : type of the particules to be setted. */ -void scalfmm_add_to_positions_xyz(scalfmm_handle Handle, int NbPositions, double * updatedXYZ); -void scalfmm_add_to_positions(scalfmm_handle Handle, int NbPositions, double * X, double * Y , double * Z); -void scalfmm_add_to_positions_xyz_npart(scalfmm_handle Handle, int NbPositions, int* idxOfParticles, double * updatedXYZ); -void scalfmm_add_to_positions_npart(scalfmm_handle Handle, int NbPositions, int* idxOfParticles, double * X, double * Y , double * Z); +void scalfmm_add_to_positions_xyz(scalfmm_handle Handle, int NbPositions, double * updatedXYZ, PartType type); +void scalfmm_add_to_positions(scalfmm_handle Handle, int NbPositions, double * X, double * Y , double * Z, PartType type); +void scalfmm_add_to_positions_xyz_npart(scalfmm_handle Handle, int NbPositions, int* idxOfParticles, double * updatedXYZ, PartType type); +void scalfmm_add_to_positions_npart(scalfmm_handle Handle, int NbPositions, int* idxOfParticles, double * X, double * Y , double * Z, PartType type); /** @@ -305,17 +347,17 @@ void scalfmm_add_to_positions_npart(scalfmm_handle Handle, int NbPositions, int* * number of parts inserted) * @param newXYZ array of new positions (ie * dx1,dy1,dz1,dx2,dy2,dz2,dx3 ...) - * + * @param type : type of the particules to be setted. * @return Error code, a parts may move out of the simulation * box. ScalFMM cannot deals with that specific case. Error code : * 0. Success code 1. Could be an arg in order to be Fortran * compliant. * */ -void scalfmm_set_positions_xyz(scalfmm_handle Handle, int NbPositions, double * updatedXYZ); -void scalfmm_set_positions(scalfmm_handle Handle, int NbPositions, double * X, double * Y , double * Z); -void scalfmm_set_positions_xyz_npart(scalfmm_handle Handle, int NbPositions, int* idxOfParticles, double * updatedXYZ); -void scalfmm_set_positions_npart(scalfmm_handle Handle, int NbPositions, int* idxOfParticles, double * X, double * Y , double * Z); +void scalfmm_set_positions_xyz(scalfmm_handle Handle, int NbPositions, double * updatedXYZ, PartType type); +void scalfmm_set_positions(scalfmm_handle Handle, int NbPositions, double * X, double * Y , double * Z, PartType type); +void scalfmm_set_positions_xyz_npart(scalfmm_handle Handle, int NbPositions, int* idxOfParticles, double * updatedXYZ, PartType type); +void scalfmm_set_positions_npart(scalfmm_handle Handle, int NbPositions, int* idxOfParticles, double * X, double * Y , double * Z, PartType type); /** *@brief This is function is to be called after a call modifying some @@ -324,10 +366,10 @@ void scalfmm_set_positions_npart(scalfmm_handle Handle, int NbPositions, int* id */ void scalfmm_update_tree(scalfmm_handle handle); -void scalfmm_get_positions_xyz(scalfmm_handle Handle, int NbPositions, double * positionsToFill); -void scalfmm_get_positions(scalfmm_handle Handle, int NbPositions, double * X, double * Y , double * Z); -void scalfmm_get_positions_xyz_npart(scalfmm_handle Handle, int NbPositions, int* idxOfParticles, double * positionsToFill); -void scalfmm_get_positions_npart(scalfmm_handle Handle, int NbPositions, int* idxOfParticles, double * X, double * Y , double * Z); +void scalfmm_get_positions_xyz(scalfmm_handle Handle, int NbPositions, double * positionsToFill, PartType type); +void scalfmm_get_positions(scalfmm_handle Handle, int NbPositions, double * X, double * Y , double * Z, PartType type); +void scalfmm_get_positions_xyz_npart(scalfmm_handle Handle, int NbPositions, int* idxOfParticles, double * positionsToFill, PartType type); +void scalfmm_get_positions_npart(scalfmm_handle Handle, int NbPositions, int* idxOfParticles, double * X, double * Y , double * Z, PartType type); /* /\** */ /* * @brief This function provides a way for the user to define scalfmm */ @@ -446,7 +488,6 @@ typedef void (*Callback_P2PFull)(FSize nbParticles, const FSize* particleIndexes */ typedef void (*Callback_P2PInner)(FSize nbParticles, const FSize* particleIndexes, void* userData); - /** * @brief Function to be filled by user's method to reset a user's cell * @param level level of the cell. @@ -458,7 +499,6 @@ typedef void (*Callback_P2PInner)(FSize nbParticles, const FSize* particleIndexe */ typedef void (*Callback_reset_cell)(int level, long long morton_index, int* tree_position, double* spatial_position, void * userCell); - /** * @brief Structure containing callbacks to fill in order to define * user kernel. @@ -535,13 +575,30 @@ void scalfmm_dealloc_handle(scalfmm_handle handle, Callback_free_cell cellDestro */ void scalfmm_reset_tree(scalfmm_handle handle, Callback_reset_cell cellReseter); +///////////////////////////////////////////////////////////////////// +/////////////// Monitoring functions ///////////////// +///////////////////////////////////////////////////////////////////// + /** - * @brief This function shouldn't be there !! display information - * about the octree built versus the octree hibox want. + * @brief Scalfmm has a built in feature to get the time elapsed in + * each operator. This function returns the number of different timers. * @param Handle scalfmm_handle provided by scalfmm_init. - * @param Rinflu influence radius for each particle previously - * inserted. Tree must be built before calling this function + * @return Number of timers */ -void scalfmm_hibox_Rinflu_display(scalfmm_handle Handle, FSize nbPart, double * Rinflu); +int scalfmm_get_nb_timers(scalfmm_handle handle); + +/** + * @brief Scalfmm has a built in feature to get the time elapsed in + * each operator. This function fill the array with elapsed time for + * each operator. + * @param Handle scalfmm_handle provided by scalfmm_init. + * @param Array of Timers, to be allocated by the user (using + * scalfmm_get_nb_timers) + * Order inside the array : P2M, M2M, M2L, L2L, L2P, P2P, NearField + * (P2P+L2P). + */ +void scalfmm_get_timers(scalfmm_handle handle,double * Timers); + + #endif diff --git a/Addons/CKernelApi/Src/FInterEngine.hpp b/Addons/CKernelApi/Src/FInterEngine.hpp index e44ce640002f4f87270a17703bec96734e2c23e7..b2c75cee01800cd1232655bb6598d6211ac49d20 100644 --- a/Addons/CKernelApi/Src/FInterEngine.hpp +++ b/Addons/CKernelApi/Src/FInterEngine.hpp @@ -23,43 +23,45 @@ #define FINTERENGINE_HPP #include "FScalFMMEngine.hpp" + #include "Kernels/Interpolation/FInterpMatrixKernel.hpp" -//#include "Kernels/P2P/FP2PLeafInterface.hpp" +#include "Components/FTypedLeaf.hpp" + #include "Arranger/FOctreeArranger.hpp" #include "Arranger/FArrangerPeriodic.hpp" #include "Arranger/FBasicParticleContainerIndexedMover.hpp" +#include "Arranger/FParticleTypedIndexedMover.hpp" #include "Core/FFmmAlgorithmThread.hpp" #include "Core/FFmmAlgorithm.hpp" #include "Core/FFmmAlgorithmPeriodic.hpp" +#include "Core/FFmmAlgorithmThreadTsm.hpp" /** * @class FInterEngine implements API for Interpolations kernels, its * templates can be ChebCell/ChebKernel or UnifCell/UnifKernel */ -template<class FReal, class InterCell,class InterKernel, - class ContainerClass = FP2PParticleContainerIndexed<FReal>, - class LeafClass = FSimpleLeaf<FReal, FP2PParticleContainerIndexed<FReal> >, +template<class FReal, class InterCell,class InterKernel,class LeafClass, class MatrixKernelClass = FInterpMatrixKernelR<FReal> > -class FInterEngine : public FScalFMMEngine{ +class FInterEngine : public FScalFMMEngine<FReal>{ private: - //Typedef on the octree class, in order to clarify following code - typedef FOctree<FReal,InterCell,ContainerClass,LeafClass> OctreeClass; - //typedef FP2PLeafInterface<OctreeClass> LeafInterface; + //Typedefs + typedef FP2PParticleContainerIndexed<FReal> ContainerClass; + typedef FTypedLeaf<FReal,ContainerClass> LeafClassTyped; + //Typedef on the Typed OctreeClass + typedef FOctree<FReal,InterCell,ContainerClass,LeafClass> OctreeClass; - //Typedef on Octree Arranger, in order to clarify following code - typedef FBasicParticleContainerIndexedMover<FReal,OctreeClass, ContainerClass> MoverClass; - typedef FOctreeArranger<FReal,OctreeClass, ContainerClass, MoverClass> ArrangerClass; - typedef FArrangerPeriodic<FReal,OctreeClass, ContainerClass, MoverClass> ArrangerClassPeriodic; //Pointer to the kernel to be executed InterKernel * kernel; MatrixKernelClass * matrix; //Link to the tree OctreeClass * octree; - ArrangerClass * arranger; + + // ArrangerClass * arranger; + public: /** @@ -71,11 +73,11 @@ public: * simulation box */ FInterEngine(scalfmm_kernel_type KernelType) : - kernel(nullptr), matrix(nullptr), octree(nullptr),arranger(nullptr){ - kernelType = KernelType; + kernel(nullptr), matrix(nullptr), octree(nullptr)/*,arranger(nullptr)*/{ + FScalFMMEngine<FReal>::kernelType = KernelType; } - void build_tree(int TreeHeight, double BoxWidth , double * BoxCenter,User_Scalfmm_Cell_Descriptor notUsedHere){ + void build_tree(int TreeHeight, FReal BoxWidth , FReal * BoxCenter,User_Scalfmm_Cell_Descriptor notUsedHere){ octree = new OctreeClass(TreeHeight,FMath::Min(3,TreeHeight-1),BoxWidth,FPoint<FReal>(BoxCenter)); this->matrix = new MatrixKernelClass(); this->kernel = new InterKernel(TreeHeight,BoxWidth,FPoint<FReal>(BoxCenter),matrix); @@ -85,198 +87,308 @@ public: //TODO free kernel too ~FInterEngine(){ delete matrix; - delete octree; - delete kernel; - if(arranger){ - delete arranger; + if(octree){ + delete octree; + } + if(kernel){ + delete kernel; } + // if(arranger){ + // delete arranger; + // } } //Inserting array of position - void tree_insert_particles_xyz(int NbPositions, double * XYZ){ - for(int idPart = 0; idPart<NbPositions ; ++idPart){ - octree->insert(FPoint<FReal>(&XYZ[3*idPart]),idPart); + //Need to be disabled if Source/Target is used + void tree_insert_particles_xyz(int NbPositions, FReal * XYZ, PartType type){ + if(type == BOTH){ + for(FSize idPart = 0; idPart<NbPositions ; ++idPart){ + octree->insert(FPoint<FReal>(&XYZ[3*idPart]),idPart); + } + FScalFMMEngine<FReal>::nbPart += NbPositions; + }else{ + if(type==SOURCE){ + for(FSize idPart = 0; idPart<NbPositions ; ++idPart){ + octree->insert(FPoint<FReal>(&XYZ[3*idPart]),FParticleTypeSource,idPart); + } + FScalFMMEngine<FReal>::nbPart += NbPositions; + }else{ + for(FSize idPart = 0; idPart<NbPositions ; ++idPart){ + octree->insert(FPoint<FReal>(&XYZ[3*idPart]),FParticleTypeTarget,idPart); + } + FScalFMMEngine<FReal>::nbPart += NbPositions; + } } - nbPart += NbPositions; } //Inserting arrayS of position - void tree_insert_particles(int NbPositions, double * X, double * Y, double * Z){ - for(int idPart = 0; idPart<NbPositions ; ++idPart){ - octree->insert(FPoint<FReal>(X[idPart],Y[idPart],Z[idPart]),idPart); + //Need to be disabled if Source/Target is used + void tree_insert_particles(int NbPositions, FReal * X, FReal * Y, FReal * Z, PartType type){ + if(type == BOTH){ + for(FSize idPart = 0; idPart<NbPositions ; ++idPart){ + octree->insert(FPoint<FReal>(X[idPart],Y[idPart],Z[idPart]),idPart); + } + FScalFMMEngine<FReal>::nbPart += NbPositions; + }else{ + if(type==SOURCE){ + for(FSize idPart = 0; idPart<NbPositions ; ++idPart){ + octree->insert(FPoint<FReal>(X[idPart],Y[idPart],Z[idPart]),FParticleTypeSource,idPart); + } + FScalFMMEngine<FReal>::nbPart += NbPositions; + }else{ + for(FSize idPart = 0; idPart<NbPositions ; ++idPart){ + octree->insert(FPoint<FReal>(X[idPart],Y[idPart],Z[idPart]),FParticleTypeTarget,idPart); + } + FScalFMMEngine<FReal>::nbPart += NbPositions; + } } - nbPart += NbPositions; } + // void tree_abstract_insert(int NbPartToInsert, int nbAttributeToInsert, int * strideForEachAtt, + // FReal* rawDatas){ + // FAssertLF(nbAttributeToInsert > 2,"Need space to store positions, thus nbAttributeToInsert must be >= 3\nExiting ... \n"); + // FAssertLF(nbAttributeToInsert < 15,"Cannot instanciate more than 15 Attribute per Particules\n"); + // FRunIf::Run<int,3,15,1,RunContainer>(nbAttributeToInsert,); + // generic_tree_abstract_insert<ContainerClass,LeafClass,InterCell,nbAttributeToInsert>(octree, + // NbPartToInsert,strideForEachAtt,rawDatas); + // } + //Set the physical values - void set_physical_values(int nbPhysicalValues,double * physicalValues){ - octree->forEachLeaf([&](LeafClass* leaf){ - ContainerClass * sources = leaf->getSrc(); - const FVector<FSize>& indexes = leaf->getTargets()->getIndexes(); - FSize nbPartThere = sources->getNbParticles(); - for(FSize idxPart = 0 ; idxPart<nbPartThere ; ++idxPart){ - sources->getPhysicalValues()[idxPart] = physicalValues[indexes[idxPart]]; - } - }); + void set_physical_values(int nbPhysicalValues,FReal * physicalValues, PartType type){ + int checkCount = 0; + if(type == SOURCE){ + octree->forEachLeaf([&] (LeafClass * leaf){ + ContainerClass * sources = leaf->getSrc(); + const FVector<FSize>& indexes = sources->getIndexes(); + FSize nbPartThere = sources->getNbParticles(); + for(int idx=0 ; idx<nbPartThere ; ++idx){ + sources->getPhysicalValues()[idx] = physicalValues[indexes[idx]]; + ++checkCount; + } + }); + } + else{ // type must be equal to TARGETS or BOTH + octree->forEachLeaf([&] (LeafClass * leaf){ + ContainerClass * targets = leaf->getTargets(); + const FVector<FSize>& indexes = targets->getIndexes(); + FSize nbPartThere = targets->getNbParticles(); + for(int idx=0 ; idx<nbPartThere ; ++idx){ + targets->getPhysicalValues()[idx] = physicalValues[indexes[idx]]; + ++checkCount; + } + }); + } + if(checkCount < nbPhysicalValues){std::cout << "Not all "<<nbPhysicalValues <<" parts has been set (only "<<checkCount<<")" << std::endl;} + else{ + if(checkCount > nbPhysicalValues){std::cout << "More parts than "<<nbPhysicalValues <<" has been set"<< std::endl;} + } } //Set only a subpart of physical values //Algorithm : loop over each leaf, and then search in user array //if any index matches - void set_physical_values_npart( int nbPhysicalValues, int* idxOfParticles, double * physicalValues){ - octree->forEachLeaf([&](LeafClass* leaf){ - ContainerClass * sources = leaf->getSrc(); - const FVector<FSize>& indexes = leaf->getTargets()->getIndexes(); - FSize nbPartThere = sources->getNbParticles(); - for(FSize idxPart = 0 ; idxPart<nbPartThere ; ++idxPart){ - int iterPart = 0; - bool notFoundYet = true; - while(iterPart < nbPhysicalValues && notFoundYet){ - if(indexes[idxPart] == idxOfParticles[iterPart]){ - sources->getPhysicalValues()[idxPart] = physicalValues[indexes[idxPart]]; - notFoundYet = false; + void set_physical_values_npart( int nbPhysicalValues, int* idxOfParticles, FReal * physicalValues, PartType type){ + int checkCount = 0; + if(type == SOURCE){ + octree->forEachLeaf([&](LeafClass* leaf){ + ContainerClass * sources = leaf->getSrc(); + const FVector<FSize>& indexes = sources->getIndexes(); + FSize nbPartThere = sources->getNbParticles(); + for(FSize idxPart = 0 ; idxPart<nbPartThere ; ++idxPart){ + int iterPart = 0; + bool notFoundYet = true; + while(iterPart < nbPhysicalValues && notFoundYet){ + if(indexes[idxPart] == idxOfParticles[iterPart]){ + sources->getPhysicalValues()[idxPart] = physicalValues[iterPart]; + checkCount++; + notFoundYet = false; + } + else{ + ++iterPart; + } } - else{ - ++iterPart; + } + }); + }else{//Parts are target + octree->forEachLeaf([&](LeafClass* leaf){ + ContainerClass * targets = leaf->getTargets(); + const FVector<FSize>& indexes = targets->getIndexes(); + FSize nbPartTarget = targets->getNbParticles(); + //Targets part + for(FSize idxPart = 0 ; idxPart<nbPartTarget ; ++idxPart){ + int iterPart = 0; + bool notFoundYet = true; + while(iterPart < nbPhysicalValues && notFoundYet){ + if(indexes[idxPart] == idxOfParticles[iterPart]){ + targets->getPhysicalValues()[idxPart] = physicalValues[iterPart]; + notFoundYet = false; + checkCount++; + } + else{ + ++iterPart; + } } } - } - }); + }); + } + if(checkCount < nbPhysicalValues){std::cout << "Not all "<<nbPhysicalValues <<" parts has been set"<< std::endl;} + else{ + if(checkCount > nbPhysicalValues){std::cout << "More parts than "<<nbPhysicalValues <<" has been set"<< std::endl;} + } } + //get back the physical values - void get_physical_values( int nbPhysicalValues, double * physicalValues){ - octree->forEachLeaf([&](LeafClass* leaf){ - ContainerClass * sources = leaf->getSrc(); - const FVector<FSize>& indexes = leaf->getTargets()->getIndexes(); - FSize nbPartThere = sources->getNbParticles(); - for(FSize idxPart = 0 ; idxPart<nbPartThere ; ++idxPart){ - physicalValues[indexes[idxPart]] = sources->getPhysicalValues()[idxPart]; - } - }); + void get_physical_values( int nbPhysicalValues, FReal * physicalValues, PartType type){ + int checkCount = 0; + if(type == SOURCE){ + octree->forEachLeaf([&](LeafClass* leaf){ + ContainerClass * sources = leaf->getSrc(); + const FVector<FSize>& indexes = sources->getIndexes(); + FSize nbPartThere = sources->getNbParticles(); + for(FSize idxPart = 0 ; idxPart<nbPartThere ; ++idxPart){ + physicalValues[indexes[idxPart]] = sources->getPhysicalValues()[idxPart]; + checkCount++; + } + }); + } + else{//Get the targets forces + octree->forEachLeaf([&](LeafClass* leaf){ + ContainerClass * targets = leaf->getTargets(); + const FVector<FSize>& indexes = targets->getIndexes(); + FSize nbPartThere = targets->getNbParticles(); + for(FSize idxPart = 0 ; idxPart<nbPartThere ; ++idxPart){ + physicalValues[indexes[idxPart]] = targets->getPhysicalValues()[idxPart]; + checkCount++; + } + }); + } + if(checkCount < nbPhysicalValues){std::cout << "Not all "<<nbPhysicalValues <<" parts has been read"<< std::endl;} + else{ + if(checkCount > nbPhysicalValues){std::cout << "More parts than "<<nbPhysicalValues <<" has been read"<< std::endl;} + } } + //Same algorithm as in set_physical_values_npart - void get_physical_values_npart( int nbPhysicalValues, int* idxOfParticles, double * physicalValues){ - octree->forEachLeaf([&](LeafClass* leaf){ - ContainerClass * sources = leaf->getSrc(); - const FVector<FSize>& indexes = leaf->getTargets()->getIndexes(); - FSize nbPartThere = sources->getNbParticles(); - for(FSize idxPart = 0 ; idxPart<nbPartThere ; ++idxPart){ - int iterPart = 0; - bool notFoundYet = true; - while(iterPart < nbPhysicalValues && notFoundYet){ - if(indexes[idxPart] == idxOfParticles[iterPart]){ - physicalValues[indexes[idxPart]] = sources->getPhysicalValues()[idxPart]; - notFoundYet = false; + void get_physical_values_npart( int nbPhysicalValues, int* idxOfParticles, FReal * physicalValues, PartType type){ + int checkCount = 0; + if(type == SOURCE){ + octree->forEachLeaf([&](LeafClass* leaf){ + ContainerClass * sources = leaf->getSrc(); + const FVector<FSize>& indexes = sources->getIndexes(); + FSize nbPartThere = sources->getNbParticles(); + for(FSize idxPart = 0 ; idxPart<nbPartThere ; ++idxPart){ + int iterPart = 0; + bool notFoundYet = true; + while(iterPart < nbPhysicalValues && notFoundYet){ + if(indexes[idxPart] == idxOfParticles[iterPart]){ + physicalValues[iterPart] = sources->getPhysicalValues()[idxPart]; + notFoundYet = false; + checkCount++; + } + else{ + ++iterPart; + } } - else{ - ++iterPart; + } + }); + }else{ //Target + octree->forEachLeaf([&](LeafClass* leaf){ + ContainerClass * targets = leaf->getTargets(); + const FVector<FSize>& indexes = targets->getIndexes(); + FSize nbPartThere = targets->getNbParticles(); + for(FSize idxPart = 0 ; idxPart<nbPartThere ; ++idxPart){ + int iterPart = 0; + bool notFoundYet = true; + while(iterPart < nbPhysicalValues && notFoundYet){ + if(indexes[idxPart] == idxOfParticles[iterPart]){ + physicalValues[iterPart] = targets->getPhysicalValues()[idxPart]; + notFoundYet = false; + checkCount++; + } + else{ + ++iterPart; + } } } - } - }); + }); + } + if(checkCount < nbPhysicalValues){std::cout << "Not all "<<nbPhysicalValues <<" parts has been read"<< std::endl;} + else{ + if(checkCount > nbPhysicalValues){std::cout << "More parts than "<<nbPhysicalValues <<" has been read"<< std::endl;} + } } - void get_forces_xyz( int nbParts, double * forcesToFill){ - octree->forEachLeaf([&](LeafClass* leaf){ - ContainerClass * sources = leaf->getSrc(); - const FVector<FSize>& indexes = leaf->getTargets()->getIndexes(); - FSize nbPartThere = sources->getNbParticles(); - for(FSize idxPart = 0 ; idxPart<nbPartThere ; ++idxPart){ - forcesToFill[indexes[idxPart]*3+0] = sources->getForcesX()[idxPart]; - forcesToFill[indexes[idxPart]*3+1] = sources->getForcesY()[idxPart]; - forcesToFill[indexes[idxPart]*3+2] = sources->getForcesZ()[idxPart]; - } - }); + void get_forces_xyz( int nbParts, FReal * forcesToFill, PartType type){ + FScalFMMEngine<FReal>::template generic_get_forces_xyz<ContainerClass,LeafClass,InterCell>(octree,nbParts,forcesToFill,type); } - void get_forces_xyz_npart(int nbParts, int* idxOfParticles , double * forcesToFill){ - octree->forEachLeaf([&](LeafClass* leaf){ - ContainerClass * sources = leaf->getSrc(); - const FVector<FSize>& indexes = leaf->getTargets()->getIndexes(); - FSize nbPartThere = sources->getNbParticles(); - for(FSize idxPart = 0 ; idxPart<nbPartThere ; ++idxPart){ - int iterPart = 0; - bool notFoundYet = true; - while(iterPart < nbParts && notFoundYet){ - if(indexes[idxPart] == idxOfParticles[iterPart]){ - forcesToFill[indexes[idxPart]*3+0] = sources->getForcesX()[idxPart]; - forcesToFill[indexes[idxPart]*3+1] = sources->getForcesY()[idxPart]; - forcesToFill[indexes[idxPart]*3+2] = sources->getForcesZ()[idxPart]; - notFoundYet = false; - } - else{ - ++iterPart; - } - } - } - }); + void get_forces(int nbParts, FReal * fX, FReal* fY, FReal* fZ, PartType type){ + FScalFMMEngine<FReal>::template generic_get_forces<ContainerClass,LeafClass,InterCell>(octree,nbParts,fX,fY,fZ,type); } - void get_forces( int nbParts, double * fX, double* fY, double* fZ){ - octree->forEachLeaf([&](LeafClass* leaf){ - ContainerClass * sources = leaf->getSrc(); - const FVector<FSize>& indexes = leaf->getTargets()->getIndexes(); - FSize nbPartThere = sources->getNbParticles(); - for(FSize idxPart = 0 ; idxPart<nbPartThere ; ++idxPart){ - fX[indexes[idxPart]] = sources->getForcesX()[idxPart]; - fY[indexes[idxPart]] = sources->getForcesY()[idxPart]; - fZ[indexes[idxPart]] = sources->getForcesZ()[idxPart]; - } - }); + void get_forces_nbpart(int nbParts, int* idxOfParticles ,FReal * fX, FReal* fY, FReal* fZ, PartType type){ + FScalFMMEngine<FReal>::template generic_get_forces_xyz_npart<ContainerClass,LeafClass,InterCell>(octree,nbParts,idxOfParticles,fX,fY,fZ,type); } - void get_forces_npart(int nbParts, int* idxOfParticles ,double * fX, double* fY, double* fZ){ - octree->forEachLeaf([&](LeafClass* leaf){ - ContainerClass * sources = leaf->getSrc(); - const FVector<FSize>& indexes = leaf->getTargets()->getIndexes(); - FSize nbPartThere = sources->getNbParticles(); - for(FSize idxPart = 0 ; idxPart<nbPartThere ; ++idxPart){ - int iterPart = 0; - bool notFoundYet = true; - while(iterPart < nbParts && notFoundYet){ - if(indexes[idxPart] == idxOfParticles[iterPart]){ - fX[indexes[idxPart]] = sources->getForcesX()[idxPart]; - fY[indexes[idxPart]] = sources->getForcesY()[idxPart]; - fZ[indexes[idxPart]] = sources->getForcesZ()[idxPart]; - notFoundYet = false; - } - else{ - ++iterPart; - } - } - } - }); + void get_forces_xyz_nbpart(int nbParts, int* idxOfParticles, FReal * forcesToFill, PartType type){ + FScalFMMEngine<FReal>::template generic_get_forces_xyz_npart<ContainerClass,LeafClass,InterCell>(octree,nbParts,idxOfParticles,forcesToFill,type); } + //To set initial condition - void set_forces_xyz( int nbParts, double * forcesToRead){ - octree->forEachLeaf([&](LeafClass* leaf){ + void set_forces_xyz( int nbParts, FReal * forcesToRead, PartType type){ + int checkCount = 0; + if(type == SOURCE || type==BOTH){ + octree->forEachLeaf([&](LeafClass* leaf){ ContainerClass * sources = leaf->getSrc(); - const FVector<FSize>& indexes = leaf->getTargets()->getIndexes(); + const FVector<FSize>& indexes = sources->getIndexes(); FSize nbPartThere = sources->getNbParticles(); for(FSize idxPart = 0 ; idxPart<nbPartThere ; ++idxPart){ sources->getForcesX()[idxPart] = forcesToRead[indexes[idxPart]*3+0]; sources->getForcesY()[idxPart] = forcesToRead[indexes[idxPart]*3+1]; sources->getForcesZ()[idxPart] = forcesToRead[indexes[idxPart]*3+2]; + checkCount++; + } + }); + } + else{//Set force on target + octree->forEachLeaf([&](LeafClass* leaf){ + ContainerClass * targets = leaf->getTargets(); + const FVector<FSize>& indexes = targets->getIndexes(); + FSize nbPartThere = targets->getNbParticles(); + for(FSize idxPart = 0 ; idxPart<nbPartThere ; ++idxPart){ + targets->getForcesX()[idxPart] = forcesToRead[indexes[idxPart]*3+0]; + targets->getForcesY()[idxPart] = forcesToRead[indexes[idxPart]*3+1]; + targets->getForcesZ()[idxPart] = forcesToRead[indexes[idxPart]*3+2]; + checkCount++; } }); + } + if(checkCount < nbParts){std::cout << "Not all "<<nbParts <<" forces has been read"<< std::endl;} + else{ + if(checkCount > nbParts){std::cout << "More parts than "<<nbParts <<" forces has been read"<< std::endl;} + } } - void set_forces_xyz_npart( int nbParts, int* idxOfParticles, double * forcesToRead){ - octree->forEachLeaf([&](LeafClass* leaf){ + + void set_forces_xyz_npart( int nbParts, int* idxOfParticles, FReal * forcesToRead, PartType type){ + int checkCount = 0; + if(type == SOURCE || type==BOTH){ + octree->forEachLeaf([&](LeafClass* leaf){ ContainerClass * sources = leaf->getSrc(); - const FVector<FSize>& indexes = leaf->getTargets()->getIndexes(); + const FVector<FSize>& indexes = sources->getIndexes(); FSize nbPartThere = sources->getNbParticles(); for(FSize idxPart = 0 ; idxPart<nbPartThere ; ++idxPart){ int iterPart = 0; bool notFoundYet = true; while(iterPart < nbParts && notFoundYet){ if(indexes[idxPart] == idxOfParticles[iterPart]){ - sources->getForcesX()[idxPart] = forcesToRead[indexes[idxPart]*3+0]; - sources->getForcesY()[idxPart] = forcesToRead[indexes[idxPart]*3+1]; - sources->getForcesZ()[idxPart] = forcesToRead[indexes[idxPart]*3+2]; + sources->getForcesX()[idxPart] = forcesToRead[iterPart]; + sources->getForcesY()[idxPart] = forcesToRead[iterPart]; + sources->getForcesZ()[idxPart] = forcesToRead[iterPart]; notFoundYet = false; + checkCount++; } else{ ++iterPart; @@ -284,33 +396,21 @@ public: } } }); - } - void set_forces( int nbParts, double * fX, double* fY, double* fZ){ - octree->forEachLeaf([&](LeafClass* leaf){ - ContainerClass * sources = leaf->getSrc(); - const FVector<FSize>& indexes = leaf->getTargets()->getIndexes(); - FSize nbPartThere = sources->getNbParticles(); - for(FSize idxPart = 0 ; idxPart<nbPartThere ; ++idxPart){ - sources->getForcesX()[idxPart] = fX[indexes[idxPart]]; - sources->getForcesY()[idxPart] = fY[indexes[idxPart]]; - sources->getForcesZ()[idxPart] = fZ[indexes[idxPart]]; - } - }); - } - void set_forces_npart( int nbParts, int* idxOfParticles, double * fX, double* fY, double* fZ){ - octree->forEachLeaf([&](LeafClass* leaf){ - ContainerClass * sources = leaf->getSrc(); - const FVector<FSize>& indexes = leaf->getTargets()->getIndexes(); - FSize nbPartThere = sources->getNbParticles(); + }else{ //Targets + octree->forEachLeaf([&](LeafClass* leaf){ + ContainerClass * targets = leaf->getTargets(); + const FVector<FSize>& indexes = targets->getIndexes(); + FSize nbPartThere = targets->getNbParticles(); for(FSize idxPart = 0 ; idxPart<nbPartThere ; ++idxPart){ int iterPart = 0; bool notFoundYet = true; while(iterPart < nbParts && notFoundYet){ if(indexes[idxPart] == idxOfParticles[iterPart]){ - sources->getForcesX()[idxPart] = fX[indexes[idxPart]]; - sources->getForcesY()[idxPart] = fY[indexes[idxPart]]; - sources->getForcesZ()[idxPart] = fZ[indexes[idxPart]]; + targets->getForcesX()[idxPart] = forcesToRead[iterPart]; + targets->getForcesY()[idxPart] = forcesToRead[iterPart]; + targets->getForcesZ()[idxPart] = forcesToRead[iterPart]; notFoundYet = false; + checkCount++; } else{ ++iterPart; @@ -318,242 +418,336 @@ public: } } }); + } + if(checkCount < nbParts){std::cout << "Not all "<<nbParts <<" forces has been read"<< std::endl;} + else{ + if(checkCount > nbParts){std::cout << "More parts than "<<nbParts <<" forces has been read"<< std::endl;} + } } - //Set the potentials - void set_potentials(int nbPotentials,double * potentialsToRead){ - octree->forEachLeaf([&](LeafClass* leaf){ - ContainerClass * sources = leaf->getSrc(); - const FVector<FSize>& indexes = leaf->getTargets()->getIndexes(); - FSize nbPartThere = sources->getNbParticles(); - for(FSize idxPart = 0 ; idxPart<nbPartThere ; ++idxPart){ - sources->getPotentials()[idxPart] = potentialsToRead[indexes[idxPart]]; - } - }); + void set_forces( int nbParts, FReal * fX, FReal* fY, FReal* fZ, PartType type){ + int checkCount = 0; + if(type == SOURCE || type==BOTH){ + octree->forEachLeaf([&](LeafClass* leaf){ + ContainerClass * sources = leaf->getSrc(); + const FVector<FSize>& indexes = sources->getIndexes(); + FSize nbPartThere = sources->getNbParticles(); + for(FSize idxPart = 0 ; idxPart<nbPartThere ; ++idxPart){ + sources->getForcesX()[idxPart] = fX[indexes[idxPart]]; + sources->getForcesY()[idxPart] = fY[indexes[idxPart]]; + sources->getForcesZ()[idxPart] = fZ[indexes[idxPart]]; + checkCount++; + } + }); + }else{//Targets + octree->forEachLeaf([&](LeafClass* leaf){ + ContainerClass * targets = leaf->getTargets(); + const FVector<FSize>& indexes = targets->getIndexes(); + FSize nbPartThere = targets->getNbParticles(); + for(FSize idxPart = 0 ; idxPart<nbPartThere ; ++idxPart){ + targets->getForcesX()[idxPart] = fX[indexes[idxPart]]; + targets->getForcesY()[idxPart] = fY[indexes[idxPart]]; + targets->getForcesZ()[idxPart] = fZ[indexes[idxPart]]; + checkCount++; + } + }); + } + if(checkCount < nbParts){std::cout << "Not all "<<nbParts <<" forces has been read"<< std::endl;} + else{ + if(checkCount > nbParts){std::cout << "More parts than "<<nbParts <<" forces has been read"<< std::endl;} + } } - //Set only a subpart of potentials - //Algorithm : loop over each leaf, and then search in user array - //if any index matches - void set_potentials_npart( int nbPotentials, int* idxOfParticles, double * potentialsToRead){ - octree->forEachLeaf([&](LeafClass* leaf){ - ContainerClass * sources = leaf->getSrc(); - const FVector<FSize>& indexes = leaf->getTargets()->getIndexes(); - FSize nbPartThere = sources->getNbParticles(); - for(FSize idxPart = 0 ; idxPart<nbPartThere ; ++idxPart){ - int iterPart = 0; - bool notFoundYet = true; - while(iterPart < nbPotentials && notFoundYet){ - if(indexes[idxPart] == idxOfParticles[iterPart]){ - sources->getPotentials()[idxPart] = potentialsToRead[indexes[idxPart]]; - notFoundYet = false; + void set_forces_npart( int nbParts, int* idxOfParticles, FReal * fX, FReal* fY, FReal* fZ, PartType type){ + int checkCount = 0; + if(type == SOURCE || type==BOTH){ + octree->forEachLeaf([&](LeafClass* leaf){ + ContainerClass * sources = leaf->getSrc(); + const FVector<FSize>& indexes = sources->getIndexes(); + FSize nbPartThere = sources->getNbParticles(); + for(FSize idxPart = 0 ; idxPart<nbPartThere ; ++idxPart){ + int iterPart = 0; + bool notFoundYet = true; + while(iterPart < nbParts && notFoundYet){ + if(indexes[idxPart] == idxOfParticles[iterPart]){ + sources->getForcesX()[idxPart] = fX[indexes[idxPart]]; + sources->getForcesY()[idxPart] = fY[indexes[idxPart]]; + sources->getForcesZ()[idxPart] = fZ[indexes[idxPart]]; + notFoundYet = false; + checkCount++; + } + else{ + ++iterPart; + } } - else{ - ++iterPart; + } + }); + }else{//Targets + octree->forEachLeaf([&](LeafClass* leaf){ + ContainerClass * targets = leaf->getTargets(); + const FVector<FSize>& indexes = targets->getIndexes(); + FSize nbPartThere = targets->getNbParticles(); + for(FSize idxPart = 0 ; idxPart<nbPartThere ; ++idxPart){ + int iterPart = 0; + bool notFoundYet = true; + while(iterPart < nbParts && notFoundYet){ + if(indexes[idxPart] == idxOfParticles[iterPart]){ + targets->getForcesX()[idxPart] = fX[indexes[idxPart]]; + targets->getForcesY()[idxPart] = fY[indexes[idxPart]]; + targets->getForcesZ()[idxPart] = fZ[indexes[idxPart]]; + notFoundYet = false; + checkCount++; + } + else{ + ++iterPart; + } } } - } - }); + }); + } + if(checkCount < nbParts){std::cout << "Not all "<<nbParts <<" forces has been read"<< std::endl;} + else{ + if(checkCount > nbParts){std::cout << "More parts than "<<nbParts <<" forces has been read"<< std::endl;} + } } - //get back the potentials - void get_potentials( int nbPotentials, double * potentialsToFill){ - octree->forEachLeaf([&](LeafClass* leaf){ - ContainerClass * sources = leaf->getTargets(); - const FVector<FSize>& indexes = leaf->getTargets()->getIndexes(); - FSize nbPartThere = sources->getNbParticles(); - for(FSize idxPart = 0 ; idxPart<nbPartThere ; ++idxPart){ - potentialsToFill[indexes[idxPart]] = sources->getPotentials()[idxPart]; - } - }); + + + /** + * Position related methods + */ + void get_positions_xyz(int NbPositions, double * positionsToFill, PartType type){ + FScalFMMEngine<FReal>::template generic_get_positions_xyz<ContainerClass,LeafClass,InterCell>(octree,NbPositions,positionsToFill,type); + } + void get_positions_xyz_npart(int NbPositions, int * idxOfParticles, double * positionsToFill,PartType type){ + FScalFMMEngine<FReal>::template generic_get_positions_xyz_npart<ContainerClass,LeafClass,InterCell>(octree,NbPositions,idxOfParticles,positionsToFill,type); + } + void get_positions( int NbPositions, double *X, double *Y , double *Z, PartType type){ + FScalFMMEngine<FReal>::template generic_get_positions<ContainerClass,LeafClass,InterCell>(octree,NbPositions,X,Y,Z,type); + } + void get_positions_npart(int NbPositions, int * idxOfParticles,double * X, double * Y , double * Z,PartType type){ + FScalFMMEngine<FReal>::template generic_get_positions_npart<ContainerClass,LeafClass,InterCell>(octree,NbPositions,idxOfParticles,X,Y,Z,type); + } + void set_positions_xyz(int NbPositions, FReal * updatedXYZ, PartType type){ + FScalFMMEngine<FReal>::template generic_set_positions_xyz<ContainerClass,LeafClass,InterCell>(octree,NbPositions,updatedXYZ,type); + } + void set_positions(int NbPositions, FReal * X, FReal * Y, FReal * Z, PartType type){ + FScalFMMEngine<FReal>::template generic_set_positions<ContainerClass,LeafClass,InterCell>(octree,NbPositions,X,Y,Z,type); + } + void set_positions_xyz_npart(int NbPositions, int* idxOfParticles, FReal * updatedXYZ, PartType type){ + FScalFMMEngine<FReal>::template generic_set_positions_xyz_npart<ContainerClass,LeafClass,InterCell>(octree,NbPositions,idxOfParticles,updatedXYZ,type); + } + void set_positions_npart(int NbPositions, int* idxOfParticles, FReal * X, FReal * Y , FReal * Z, PartType type){ + FScalFMMEngine<FReal>::template generic_set_positions_npart<ContainerClass,LeafClass,InterCell>(octree,NbPositions,idxOfParticles,X,Y,Z,type); + } + void add_to_positions_xyz(int NbPositions,FReal * updatedXYZ,PartType type){ + FScalFMMEngine<FReal>::template generic_add_to_positions_xyz<ContainerClass,LeafClass,InterCell>(octree,NbPositions,updatedXYZ,type); + } + void add_to_positions(int NbPositions,FReal * X, FReal * Y , FReal * Z, PartType type){ + FScalFMMEngine<FReal>::template generic_add_to_positions<ContainerClass,LeafClass,InterCell>(octree,NbPositions,X,Y,Z,type); } - //Same algorithm as in set_potentials_npart - void get_potentials_npart( int nbPotentials, int* idxOfParticles, double * potentialsToFill){ - octree->forEachLeaf([&](LeafClass* leaf){ + //Set the potentials + void set_potentials(int nbPotentials,FReal * potentialsToRead, PartType type){ + int checkCount = 0; + if(type == SOURCE || type==BOTH){ + octree->forEachLeaf([&](LeafClass* leaf){ ContainerClass * sources = leaf->getSrc(); - const FVector<FSize>& indexes = leaf->getTargets()->getIndexes(); + const FVector<FSize>& indexes = sources->getIndexes(); FSize nbPartThere = sources->getNbParticles(); for(FSize idxPart = 0 ; idxPart<nbPartThere ; ++idxPart){ - int iterPart = 0; - bool notFoundYet = true; - while(iterPart < nbPotentials && notFoundYet){ - if(indexes[idxPart] == idxOfParticles[iterPart]){ - potentialsToFill[indexes[idxPart]] = sources->getPotentials()[idxPart]; - notFoundYet = false; - } - else{ - ++iterPart; - } - } + sources->getPotentials()[idxPart] = potentialsToRead[indexes[idxPart]]; + checkCount++; } }); - } - - void get_positions_xyz(int NbPositions, double * positionsToFill){ - octree->forEachLeaf([&](LeafClass* leaf){ - ContainerClass * sources = leaf->getSrc(); - const FVector<FSize>& indexes = leaf->getTargets()->getIndexes(); - FSize nbPartThere = sources->getNbParticles(); + }else{//Targets + octree->forEachLeaf([&](LeafClass* leaf){ + ContainerClass * targets = leaf->getTargets(); + const FVector<FSize>& indexes = targets->getIndexes(); + FSize nbPartThere = targets->getNbParticles(); for(FSize idxPart = 0 ; idxPart<nbPartThere ; ++idxPart){ - positionsToFill[indexes[idxPart]*3+0] = sources->getPositions()[0][idxPart]; - positionsToFill[indexes[idxPart]*3+1] = sources->getPositions()[1][idxPart]; - positionsToFill[indexes[idxPart]*3+2] = sources->getPositions()[2][idxPart]; + targets->getPotentials()[idxPart] = potentialsToRead[indexes[idxPart]]; + checkCount++; } }); + } + if(checkCount < nbPotentials){std::cout << "Not all "<<nbPotentials <<" forces has been read"<< std::endl;} + else{ + if(checkCount > nbPotentials){std::cout << "More parts than "<<nbPotentials <<" forces has been read"<< std::endl;} + } } - void get_positions_xyz_npart(int NbPositions, int * idxOfParticles, double * positionsToFill){ - octree->forEachLeaf([&](LeafClass* leaf){ - ContainerClass * sources = leaf->getSrc(); - const FVector<FSize>& indexes = leaf->getTargets()->getIndexes(); - FSize nbPartThere = sources->getNbParticles(); - for(FSize idxPart = 0 ; idxPart<nbPartThere ; ++idxPart){ - int iterPart = 0; - bool notFoundYet = true; - while(iterPart < NbPositions && notFoundYet){ - if(indexes[idxPart] == idxOfParticles[iterPart]){ - positionsToFill[indexes[idxPart]*3+0] = sources->getPositions()[0][idxPart]; - positionsToFill[indexes[idxPart]*3+1] = sources->getPositions()[1][idxPart]; - positionsToFill[indexes[idxPart]*3+2] = sources->getPositions()[2][idxPart]; - notFoundYet = false; + //Set only a subpart of potentials + //Algorithm : loop over each leaf, and then search in user array + //if any index matches + void set_potentials_npart( int nbPotentials, int* idxOfParticles, FReal * potentialsToRead, PartType type){ + int checkCount = 0; + if(type == SOURCE || type==BOTH){ + octree->forEachLeaf([&](LeafClass* leaf){ + ContainerClass * sources = leaf->getSrc(); + const FVector<FSize>& indexes = sources->getIndexes(); + FSize nbPartThere = sources->getNbParticles(); + for(FSize idxPart = 0 ; idxPart<nbPartThere ; ++idxPart){ + int iterPart = 0; + bool notFoundYet = true; + while(iterPart < nbPotentials && notFoundYet){ + if(indexes[idxPart] == idxOfParticles[iterPart]){ + sources->getPotentials()[idxPart] = potentialsToRead[iterPart]; + notFoundYet = false; + checkCount++; + } + else{ + ++iterPart; + } } - else{ - ++iterPart; + } + }); + }else{//Targets + octree->forEachLeaf([&](LeafClass* leaf){ + ContainerClass * targets = leaf->getTargets(); + const FVector<FSize>& indexes = targets->getIndexes(); + FSize nbPartThere = targets->getNbParticles(); + for(FSize idxPart = 0 ; idxPart<nbPartThere ; ++idxPart){ + int iterPart = 0; + bool notFoundYet = true; + while(iterPart < nbPotentials && notFoundYet){ + if(indexes[idxPart] == idxOfParticles[iterPart]){ + targets->getPotentials()[idxPart] = potentialsToRead[iterPart]; + notFoundYet = false; + checkCount++; + } + else{ + ++iterPart; + } } } - } - }); + }); + } + if(checkCount < nbPotentials){std::cout << "Not all "<<nbPotentials <<" potentials has been read"<< std::endl;} + else{ + if(checkCount > nbPotentials){std::cout << "More parts than "<<nbPotentials <<" potentials has been read"<< std::endl;} + } } - void get_positions( int NbPositions, double * X, double * Y , double * Z){ - octree->forEachLeaf([&](LeafClass* leaf){ - ContainerClass * sources = leaf->getSrc(); - const FVector<FSize>& indexes = leaf->getTargets()->getIndexes(); - FSize nbPartThere = sources->getNbParticles(); - for(FSize idxPart = 0 ; idxPart<nbPartThere ; ++idxPart){ - X[indexes[idxPart]] = sources->getPositions()[0][idxPart]; - Y[indexes[idxPart]] = sources->getPositions()[1][idxPart]; - Z[indexes[idxPart]] = sources->getPositions()[2][idxPart]; - } - }); + //get back the potentials + void get_potentials( int nbPotentials, FReal * potentialsToFill, PartType type){ + int checkCount = 0; + if(type == SOURCE){ + octree->forEachLeaf([&](LeafClass* leaf){ + ContainerClass * sources = leaf->getSrc(); + FSize nbPartThere = sources->getNbParticles(); + const FVector<FSize>& indexes = sources->getIndexes(); + for(FSize idxPart = 0 ; idxPart<nbPartThere ; ++idxPart){ + potentialsToFill[indexes[idxPart]] = sources->getPotentials()[idxPart]; + checkCount++; + } + }); + }else{//Targets + octree->forEachLeaf([&](LeafClass* leaf){ + ContainerClass * targets = leaf->getTargets(); + const FVector<FSize>& indexes = targets->getIndexes(); + FSize nbPartThere = targets->getNbParticles(); + for(FSize idxPart = 0 ; idxPart<nbPartThere ; ++idxPart){ + potentialsToFill[indexes[idxPart]] = targets->getPotentials()[idxPart]; + checkCount++; + } + }); + } + if(checkCount < nbPotentials){std::cout << "Not all "<<nbPotentials <<" potentials has been read"<< std::endl;} + else{ + if(checkCount > nbPotentials){std::cout << "More parts than "<<nbPotentials <<" potentials has been read"<< std::endl;} + } } - void get_positions_npart(int NbPositions, int * idxOfParticles,double * X, double * Y , double * Z){ - octree->forEachLeaf([&](LeafClass* leaf){ - ContainerClass * sources = leaf->getSrc(); - const FVector<FSize>& indexes = leaf->getTargets()->getIndexes(); - FSize nbPartThere = sources->getNbParticles(); - for(FSize idxPart = 0 ; idxPart<nbPartThere ; ++idxPart){ - int iterPart = 0; - bool notFoundYet = true; - while(iterPart < NbPositions && notFoundYet){ - if(indexes[idxPart] == idxOfParticles[iterPart]){ - X[indexes[idxPart]] = sources->getPositions()[0][idxPart]; - Y[indexes[idxPart]] = sources->getPositions()[1][idxPart]; - Z[indexes[idxPart]] = sources->getPositions()[2][idxPart]; - notFoundYet = false; + //Same algorithm as in set_potentials_npart + void get_potentials_npart( int nbPotentials, int* idxOfParticles, FReal * potentialsToFill, PartType type){ + int checkCount = 0; + if(type == SOURCE){ + octree->forEachLeaf([&](LeafClass* leaf){ + ContainerClass * sources = leaf->getSrc(); + const FVector<FSize>& indexes = sources->getIndexes(); + FSize nbPartThere = sources->getNbParticles(); + for(FSize idxPart = 0 ; idxPart<nbPartThere ; ++idxPart){ + int iterPart = 0; + bool notFoundYet = true; + while(iterPart < nbPotentials && notFoundYet){ + if(indexes[idxPart] == idxOfParticles[iterPart]){ + potentialsToFill[indexes[idxPart]] = sources->getPotentials()[idxPart]; + notFoundYet = false; + checkCount++; + } + else{ + ++iterPart; + } } - else{ - ++iterPart; + } + }); + }else{ + octree->forEachLeaf([&](LeafClass* leaf){ + ContainerClass * targets = leaf->getTargets(); + const FVector<FSize>& indexes = targets->getIndexes(); + FSize nbPartThere = targets->getNbParticles(); + for(FSize idxPart = 0 ; idxPart<nbPartThere ; ++idxPart){ + int iterPart = 0; + bool notFoundYet = true; + while(iterPart < nbPotentials && notFoundYet){ + if(indexes[idxPart] == idxOfParticles[iterPart]){ + potentialsToFill[indexes[idxPart]] = targets->getPotentials()[idxPart]; + notFoundYet = false; + checkCount++; + } + else{ + ++iterPart; + } } } - } - }); - } - - - //Arranger parts : following function provide a way to move parts - //inside the tree - void add_to_positions_xyz(int NbPositions,double * updatedXYZ){ - octree->forEachLeaf([&](LeafClass* leaf){ - ContainerClass * sources = leaf->getSrc(); - const FVector<FSize>& indexes = leaf->getTargets()->getIndexes(); - FSize nbPartThere = sources->getNbParticles(); - for(FSize idxPart = 0 ; idxPart<nbPartThere ; ++idxPart){ - sources->getWPositions()[0][idxPart] += updatedXYZ[indexes[idxPart]*3+0]; - sources->getWPositions()[1][idxPart] += updatedXYZ[indexes[idxPart]*3+1]; - sources->getWPositions()[2][idxPart] += updatedXYZ[indexes[idxPart]*3+2]; - } - }); - } - - void add_to_positions(int NbPositions,double * X, double * Y , double * Z){ - octree->forEachLeaf([&](LeafClass* leaf){ - ContainerClass * sources = leaf->getSrc(); - const FVector<FSize>& indexes = leaf->getTargets()->getIndexes(); - FSize nbPartThere = sources->getNbParticles(); - for(FSize idxPart = 0 ; idxPart<nbPartThere ; ++idxPart){ - sources->getWPositions()[0][idxPart] += X[indexes[idxPart]]; - sources->getWPositions()[1][idxPart] += Y[indexes[idxPart]]; - sources->getWPositions()[2][idxPart] += Z[indexes[idxPart]]; - } - }); - } - - - void set_positions_xyz(int NbPositions, double * updatedXYZ){ - octree->forEachLeaf([&](LeafClass* leaf){ - ContainerClass * sources = leaf->getSrc(); - const FVector<FSize>& indexes = leaf->getTargets()->getIndexes(); - FSize nbPartThere = sources->getNbParticles(); - for(FSize idxPart = 0 ; idxPart<nbPartThere ; ++idxPart){ - sources->getWPositions()[0][idxPart] = updatedXYZ[indexes[idxPart]*3+0]; - sources->getWPositions()[1][idxPart] = updatedXYZ[indexes[idxPart]*3+1]; - sources->getWPositions()[2][idxPart] = updatedXYZ[indexes[idxPart]*3+2]; - } - }); - } - - void set_positions(int NbPositions, double * X, double * Y, double * Z){ - octree->forEachLeaf([&](LeafClass* leaf){ - ContainerClass * sources = leaf->getSrc(); - const FVector<FSize>& indexes = leaf->getTargets()->getIndexes(); - FSize nbPartThere = sources->getNbParticles(); - for(FSize idxPart = 0 ; idxPart<nbPartThere ; ++idxPart){ - sources->getWPositions()[0][idxPart] = X[indexes[idxPart]]; - sources->getWPositions()[1][idxPart] = Y[indexes[idxPart]]; - sources->getWPositions()[2][idxPart] = Z[indexes[idxPart]]; - } - }); + }); + } + if(checkCount < nbPotentials){std::cout << "Not all "<<nbPotentials <<" potentials has been read"<< std::endl;} + else{ + if(checkCount > nbPotentials){std::cout << "More parts than "<<nbPotentials <<" potentials has been read"<< std::endl;} + } } //Simple call to FScalFMMEngine method with good template void reset_tree(Callback_reset_cell /*not used*/){ - generic_reset_tree<FReal,ContainerClass,InterCell,LeafClass>(octree); + FScalFMMEngine<FReal>::template generic_reset_tree<ContainerClass,InterCell,LeafClass>(octree); } - void update_tree(){ - if(arranger){ - arranger->rearrange(); - } - else{ - if(Algorithm == 2){ //case in wich the periodic algorithm is used - arranger = new ArrangerClassPeriodic(octree); - arranger->rearrange(); - } - else{ - arranger = new ArrangerClass(octree); - arranger->rearrange(); - } - } - } + + // void update_tree(){ + // if(arranger){ + // arranger->rearrange(); + // } + // else{ + // if(FScalFMMEngine<FReal>::Algorithm == 2){ //case in wich the periodic algorithm is used + // arranger = new ArrangerClassPeriodic(octree); + // arranger->rearrange(); + // } + // else{ + // arranger = new ArrangerClass(octree); + // arranger->rearrange(); + // } + // } + // } void execute_fmm(){ - switch(Algorithm){ + switch(FScalFMMEngine<FReal>::Algorithm){ case 0: { typedef FFmmAlgorithm<OctreeClass,InterCell,ContainerClass,InterKernel,LeafClass> AlgoClassSeq; - AlgoClassSeq algoSeq(octree,kernel); - algoSeq.execute(); + AlgoClassSeq* algoSeq = new AlgoClassSeq(octree,kernel); + algoSeq->execute(); + FScalFMMEngine<FReal>::algoTimer = algoSeq; break; } case 1: { typedef FFmmAlgorithmThread<OctreeClass,InterCell,ContainerClass,InterKernel,LeafClass> AlgoClassThread; - AlgoClassThread algoThread(octree,kernel); - algoThread.execute(); + AlgoClassThread* algoThread = new AlgoClassThread(octree,kernel); + algoThread->execute(); + FScalFMMEngine<FReal>::algoTimer = algoThread; break; } case 2: @@ -564,14 +758,50 @@ public: algoPeriod.execute(); break; } + case 3: + { + // typedef FFmmAlgorithmThreadTsm<OctreeClass,InterCell,ContainerClass,InterKernel,LeafClass> AlgoClassTargetSource; + // AlgoClassTargetSource* algoTS = new AlgoClassTargetSource(octree,kernel); + // algoTS->execute(); + // FScalFMMEngine<FReal>::algoTimer = algoTS; + // break; + } default : - std::cout<< "No algorithm found (probably for strange reasons) : "<< Algorithm <<" exiting" << std::endl; + std::cout<< "No algorithm found (probably for strange reasons) : "<< FScalFMMEngine<FReal>::Algorithm <<" exiting" << std::endl; } } void intern_dealloc_handle(Callback_free_cell unUsed){ //this->~FInterEngine(); } + + void print_everything(){ + octree->forEachLeaf([&](LeafClass * leaf){ + ContainerClass * sources = leaf->getSrc(); + ContainerClass * targets = leaf->getTargets(); + const FVector<FSize>& indexesSources = sources->getIndexes(); + const FVector<FSize>& indexesTargets = targets->getIndexes(); + FSize nbPartSource = sources->getNbParticles(); + FSize nbPartTarget = targets->getNbParticles(); + for(int i=0 ; i<nbPartSource ; ++i){ + printf("Src : Leaf : %p Part : %lld/%lld, pos: %e,%e,%e phy: %e, forces: %e,%e,%e pot %e\n", + leaf,indexesSources[i],nbPartSource, + sources->getPositions()[0][i],sources->getPositions()[1][i],sources->getPositions()[2][i], + sources->getPhysicalValues()[i], + sources->getForcesX()[i],sources->getForcesY()[i],sources->getForcesZ()[i], + sources->getPotentials()[i]); + } + for(int i=0 ; i<nbPartTarget ; ++i){ + printf("Tgt : Leaf : %p Part : %lld/%lld, pos %e,%e,%e phy: %e, forces: %e,%e,%e pot %e\n", + leaf,indexesTargets[i],nbPartTarget, + targets->getPositions()[0][i],targets->getPositions()[1][i],targets->getPositions()[2][i], + targets->getPhysicalValues()[i], + targets->getForcesX()[i],targets->getForcesY()[i],targets->getForcesZ()[i], + targets->getPotentials()[i]); + } + + }); + } }; diff --git a/Addons/CKernelApi/Src/FScalFMMEngine.hpp b/Addons/CKernelApi/Src/FScalFMMEngine.hpp index d9326ba056e5cffcd56dacd8b1d106af8bf2e005..363f1b66ebdbd1b34e40b401c322fc7d989ccb0b 100644 --- a/Addons/CKernelApi/Src/FScalFMMEngine.hpp +++ b/Addons/CKernelApi/Src/FScalFMMEngine.hpp @@ -29,8 +29,8 @@ //For tree #include "Components/FSimpleLeaf.hpp" + #include "Kernels/P2P/FP2PParticleContainerIndexed.hpp" -#include "Containers/FOctree.hpp" //For interpolation #include "Kernels/Interpolation/FInterpMatrixKernel.hpp" @@ -42,27 +42,37 @@ //For chebyshev Interpolation #include "Kernels/Chebyshev/FChebCell.hpp" #include "Kernels/Chebyshev/FChebSymKernel.hpp" - - +#include "Utils/FAlgorithmTimers.hpp" +#include "Components/FParticleType.hpp" +#include "Components/FTypedLeaf.hpp" +#include "Containers/FOctree.hpp" +#include "Utils/FTemplate.hpp" /** * @class FScalFMMEngine */ +template<class FReal> class FScalFMMEngine{ + protected: scalfmm_kernel_type kernelType; scalfmm_algorithm Algorithm; FVector<bool>* progress; int nbPart; + FAlgorithmTimers * algoTimer; public: - FScalFMMEngine() : Algorithm(multi_thread), progress(nullptr), nbPart(0){ + + FScalFMMEngine() : Algorithm(multi_thread), progress(nullptr), nbPart(0), algoTimer(nullptr){ progress = new FVector<bool>(); } virtual ~FScalFMMEngine() { + if(algoTimer){ + delete algoTimer; + } delete progress; } @@ -72,7 +82,6 @@ public: return this->kernelType; } - //To change default algorithm void algorithm_config(scalfmm_algorithm config){ this->Algorithm = config; @@ -83,129 +92,683 @@ public: //by specific Engine //Function about the tree - virtual void build_tree(int TreeHeight,double BoxWidth,double* BoxCenter,Scalfmm_Cell_Descriptor user_cell_descriptor){ + virtual void build_tree(int TreeHeight,FReal BoxWidth,FReal* BoxCenter,Scalfmm_Cell_Descriptor user_cell_descriptor){ FAssertLF(0,"Nothing has been done yet, exiting"); } - virtual void tree_insert_particles( int NbPositions, double * arrayX, double * arrayY, double * arrayZ){ + virtual void tree_insert_particles( int NbPositions, FReal * arrayX, FReal * arrayY, FReal * arrayZ, PartType type){ FAssertLF(0,"No tree instancied, exiting ...\n"); } - virtual void tree_insert_particles_xyz( int NbPositions, double * XYZ){ + virtual void tree_insert_particles_xyz( int NbPositions, FReal * XYZ, PartType type){ FAssertLF(0,"No tree instancied, exiting ...\n"); } - virtual void set_physical_values( int nbPhysicalValues, double * physicalValues){ + virtual void set_physical_values( int nbPhysicalValues, FReal * physicalValues, PartType type){ FAssertLF(0,"No tree instancied, exiting ...\n"); } - virtual void get_physical_values( int nbPhysicalValues, double * physicalValues){ + virtual void get_physical_values( int nbPhysicalValues, FReal * physicalValues, PartType type){ FAssertLF(0,"No tree instancied, exiting ...\n"); } virtual void set_physical_values_npart( int nbPhysicalValues, - int* idxOfParticles, double * physicalValues){ + int* idxOfParticles, FReal * physicalValues, PartType type){ FAssertLF(0,"No tree instancied, exiting ...\n"); } virtual void get_physical_values_npart( int nbPhysicalValues, - int* idxOfParticles, double * physicalValues){ + int* idxOfParticles, FReal * physicalValues, PartType type){ FAssertLF(0,"No tree instancied, exiting ...\n"); } - //To get the result - virtual void get_forces_xyz( int nbParts, double * forcesToFill){ - FAssertLF(0,"No tree instancied, exiting ...\n"); + virtual void get_forces_xyz( int nbParts, FReal * forcesToFill, PartType type){ } - virtual void get_forces_xyz_npart( int nbParts, int* idxOfParticles, double * forcesToFill){ - FAssertLF(0,"No tree instancied, exiting ...\n"); + + virtual void get_forces(int nbParts, FReal * fX, FReal* fY, FReal* fZ, PartType type){ } - virtual void get_forces( int nbParts, double * fX, double* fY, double* fZ){ - FAssertLF(0,"No tree instancied, exiting ...\n"); + + virtual void get_forces_npart(int nbParts, int* idxOfParticles ,FReal * fX, FReal* fY, FReal* fZ, PartType type){ } - virtual void get_forces_npart( int nbParts, int* idxOfParticles, double * fX, double* fY, double* fZ){ - FAssertLF(0,"No tree instancied, exiting ...\n"); + + virtual void get_forces_xyz_npart(int nbParts, int* idxOfParticles, FReal * forcesToFill, PartType type){ + } + + virtual void add_to_positions_xyz(int NbPositions,FReal * updatedXYZ,PartType type){ + } + + virtual void add_to_positions(int NbPositions,FReal * X, FReal * Y , FReal * Z, PartType type){ } + virtual void tree_abstract_insert(int NbPartToInsert, int nbAttributeToInsert, int * strideForEachAtt, + FReal* rawDatas){ + } + + //To set initial condition - virtual void set_forces_xyz( int nbParts, double * forcesToFill){ + virtual void set_forces_xyz( int nbParts, FReal * forcesToFill, PartType type){ FAssertLF(0,"No tree instancied, exiting ...\n"); } - virtual void set_forces_xyz_npart( int nbParts, int* idxOfParticles, double * forcesToFill){ + virtual void set_forces_xyz_npart( int nbParts, int* idxOfParticles, FReal * forcesToFill, PartType type){ FAssertLF(0,"No tree instancied, exiting ...\n"); } - virtual void set_forces( int nbParts, double * fX, double* fY, double* fZ){ + virtual void set_forces( int nbParts, FReal * fX, FReal* fY, FReal* fZ, PartType type){ FAssertLF(0,"No tree instancied, exiting ...\n"); } - virtual void set_forces_npart( int nbParts, int* idxOfParticles, double * fX, double* fY, double* fZ){ + virtual void set_forces_npart( int nbParts, int* idxOfParticles, FReal * fX, FReal* fY, FReal* fZ, PartType type){ FAssertLF(0,"No tree instancied, exiting ...\n"); } //To deal with potential - virtual void get_potentials( int nbParts, double * potentialsToFill){ + virtual void get_potentials( int nbParts, FReal * potentialsToFill, PartType type){ FAssertLF(0,"No tree instancied, exiting ...\n"); } - virtual void set_potentials( int nbParts, double * potentialsToRead){ + virtual void set_potentials( int nbParts, FReal * potentialsToRead, PartType type){ FAssertLF(0,"No tree instancied, exiting ...\n"); } - virtual void get_potentials_npart( int nbParts, int* idxOfParticles, double * potentialsToFill){ + virtual void get_potentials_npart( int nbParts, int* idxOfParticles, FReal * potentialsToFill, PartType type){ FAssertLF(0,"No tree instancied, exiting ...\n"); } - virtual void set_potentials_npart( int nbParts, int* idxOfParticles, double * potentialsToRead){ + virtual void set_potentials_npart( int nbParts, int* idxOfParticles, FReal * potentialsToRead, PartType type){ FAssertLF(0,"No tree instancied, exiting ...\n"); } - //Function to move particles - virtual void add_to_positions_xyz( int NbPositions, double * updatedXYZ){ - FAssertLF(0,"No tree instancied, exiting ...\n"); + + + /** Test ... */ + struct RunContainer{ + template< int nbAttributeToInsert,class ContainerClass,class LeafClass, class CellClass> + static void Run(FOctree<FReal,CellClass,ContainerClass,LeafClass> * octree, + int NbPartToInsert,int * strideForEachAtt, + FReal* rawDatas){ + generic_tree_abstract_insert<ContainerClass,LeafClass,CellClass,nbAttributeToInsert>(octree, + NbPartToInsert,strideForEachAtt,rawDatas); + } + }; + + template<class ContainerClass,class LeafClass, class CellClass, int nbAttributeToInsert> + void generic_tree_abstract_insert(FOctree<FReal,CellClass,ContainerClass,LeafClass> * octree, + int NbPartToInsert,int * strideForEachAtt, + FReal* rawDatas){ + for(FSize idxPart = 0; idxPart<NbPartToInsert ; ++idxPart){ + FPoint<FReal> pos = FPoint<FReal>(rawDatas[0],rawDatas[1],rawDatas[2]); + MortonIndex index = octree->getMortonFromPosition(pos); + //Insert with how many attributes ??? + octree->insert(pos,idxPart); + //Get again the container + ContainerClass * containerToFill = octree->getLeafSrc(index);//cannot be nullptr + std::array<FReal,nbAttributeToInsert> arrayOfAttribute; + for(int idxAtt = 0; idxAtt<nbAttributeToInsert ; ++idxAtt){ + arrayOfAttribute[idxAtt] = rawDatas[3+ strideForEachAtt[idxAtt]]; + } + int idxToRemove = containerToFill->getNbParticles(); + containerToFill->remove(&idxToRemove,1); + containerToFill->push(pos,idxPart,arrayOfAttribute); + } } - virtual void add_to_positions( int NbPositions, double * X, double * Y , double * Z){ - FAssertLF(0,"No tree instancied, exiting ...\n"); + + template<class ContainerClass,class LeafClass,class CellClass> + void generic_get_forces_xyz(FOctree<FReal,CellClass,ContainerClass,LeafClass> * octree, + int nbParts, FReal * forcesToFill, PartType type){ + int checkCount = 0; + if(type == SOURCE){ + std::cout << "No meaning to retrieve source forces ... " << std::endl; + } + else{ //Targets OR Both + octree->forEachLeaf([&](LeafClass* leaf){ + ContainerClass * targets = leaf->getTargets(); + const FVector<FSize>& indexes = targets->getIndexes(); + FSize nbPartThere = targets->getNbParticles(); + for(FSize idxPart = 0 ; idxPart<nbPartThere ; ++idxPart){ + forcesToFill[indexes[idxPart]*3+0] = targets->getForcesX()[idxPart]; + forcesToFill[indexes[idxPart]*3+1] = targets->getForcesY()[idxPart]; + forcesToFill[indexes[idxPart]*3+2] = targets->getForcesZ()[idxPart]; + checkCount++; + } + }); + } + if(checkCount < nbParts){std::cout << "Not all "<<nbParts <<" parts has been read (only "<<checkCount<<")"<< std::endl;} + else{ + if(checkCount > nbParts){std::cout << "More parts than "<<nbParts <<" has been read"<< std::endl;} + } } - virtual void add_to_positions_xyz_npart( int NbPositions, int* idxOfParticles, double * updatedXYZ){ - FAssertLF(0,"No tree instancied, exiting ...\n"); + + template<class ContainerClass,class LeafClass,class CellClass> + void generic_get_forces_xyz_npart(FOctree<FReal,CellClass,ContainerClass,LeafClass> * octree, + int nbParts, int* idxOfParticles , FReal * forcesToFill, PartType type){ + int checkCount = 0; + if(type == SOURCE){ + std::cout << "No meaning to retrieve source forces ... " << std::endl; + } + else{ //Targets OR Both + octree->forEachLeaf([&](LeafClass* leaf){ + ContainerClass * targets = leaf->getTargets(); + const FVector<FSize>& indexes = targets->getIndexes(); + FSize nbPartThere = targets->getNbParticles(); + for(FSize idxPart = 0 ; idxPart<nbPartThere ; ++idxPart){ + int iterPart = 0; + bool notFoundYet = true; + while(iterPart < nbParts && notFoundYet){ + if(indexes[idxPart] == idxOfParticles[iterPart]){ + forcesToFill[iterPart*3+0] = targets->getForcesX()[idxPart]; + forcesToFill[iterPart*3+1] = targets->getForcesY()[idxPart]; + forcesToFill[iterPart*3+2] = targets->getForcesZ()[idxPart]; + notFoundYet = false; + checkCount++; + } + else{ + ++iterPart; + } + } + } + }); + } + if(checkCount < nbParts){std::cout << "Not all "<<nbParts <<" parts has been read"<< std::endl;} + else{ + if(checkCount > nbParts){std::cout << "More parts than "<<nbParts <<" has been read"<< std::endl;} + } } - virtual void add_to_positions_npart( int NbPositions, int* idxOfParticles, - double * X, double * Y , double * Z){ - FAssertLF(0,"No tree instancied, exiting ...\n"); + + template<class ContainerClass,class LeafClass,class CellClass> + void generic_get_forces(FOctree<FReal,CellClass,ContainerClass,LeafClass> * octree, + int nbParts, FReal * fX, FReal* fY, FReal* fZ, PartType type){ + int checkCount = 0; + if(type == SOURCE){ + std::cout << "No meaning to retrieve source forces ... " << std::endl; + } + else{ //Targets OR Both + octree->forEachLeaf([&](LeafClass* leaf){ + ContainerClass * targets = leaf->getTargets(); + const FVector<FSize>& indexes = targets->getIndexes(); + FSize nbPartThere = targets->getNbParticles(); + for(FSize idxPart = 0 ; idxPart<nbPartThere ; ++idxPart){ + fX[indexes[idxPart]] = targets->getForcesX()[idxPart]; + fY[indexes[idxPart]] = targets->getForcesY()[idxPart]; + fZ[indexes[idxPart]] = targets->getForcesZ()[idxPart]; + checkCount++; + } + }); + } + if(checkCount < nbParts){std::cout << "Not all "<<nbParts <<" parts has been read"<< std::endl;} + else{ + if(checkCount > nbParts){std::cout << "More parts than "<<nbParts <<" has been read"<< std::endl;} + } + } + + template<class ContainerClass,class LeafClass,class CellClass> + void generic_get_forces_nbpart(FOctree<FReal,CellClass,ContainerClass,LeafClass> * octree, + int nbParts, int* idxOfParticles ,FReal * fX, FReal* fY, FReal* fZ, PartType type){ + int checkCount = 0; + if(type == SOURCE){ + std::cout << "No meaning to retrieve source forces ... " << std::endl; + } + else{ //Targets OR Both + octree->forEachLeaf([&](LeafClass* leaf){ + ContainerClass * targets = leaf->getTargets(); + const FVector<FSize>& indexes = targets->getIndexes(); + FSize nbPartThere = targets->getNbParticles(); + for(FSize idxPart = 0 ; idxPart<nbPartThere ; ++idxPart){ + int iterPart = 0; + bool notFoundYet = true; + while(iterPart < nbParts && notFoundYet){ + if(indexes[idxPart] == idxOfParticles[iterPart]){ + fX[iterPart] = targets->getForcesX()[idxPart]; + fY[iterPart] = targets->getForcesY()[idxPart]; + fZ[iterPart] = targets->getForcesZ()[idxPart]; + notFoundYet = false; + checkCount++; + } + else{ + ++iterPart; + } + } + } + }); + } + if(checkCount < nbParts){std::cout << "Not all "<<nbParts <<" parts has been read"<< std::endl;} + else{ + if(checkCount > nbParts){std::cout << "More parts than "<<nbParts <<" has been read"<< std::endl;} + } + } + + //Arranger parts : following function provide a way to move parts + //inside the tree + template<class ContainerClass,class LeafClass,class CellClass> + void generic_add_to_positions_xyz(FOctree<FReal,CellClass,ContainerClass,LeafClass> * octree, + int NbPositions,FReal * updatedXYZ, PartType type){ + int checkCount = 0; + if(type == SOURCE || type==BOTH){ + octree->forEachLeaf([&](LeafClass* leaf){ + ContainerClass * sources = leaf->getSrc(); + const FVector<FSize>& indexes = sources->getIndexes(); + FSize nbPartThere = sources->getNbParticles(); + for(FSize idxPart = 0 ; idxPart<nbPartThere ; ++idxPart){ + sources->getWPositions()[0][idxPart] += updatedXYZ[indexes[idxPart]*3+0]; + sources->getWPositions()[1][idxPart] += updatedXYZ[indexes[idxPart]*3+1]; + sources->getWPositions()[2][idxPart] += updatedXYZ[indexes[idxPart]*3+2]; + checkCount++; + } + }); + }else{//Targets + octree->forEachLeaf([&](LeafClass* leaf){ + ContainerClass * targets = leaf->getTargets(); + const FVector<FSize>& indexes = targets->getIndexes(); + FSize nbPartThere = targets->getNbParticles(); + for(FSize idxPart = 0 ; idxPart<nbPartThere ; ++idxPart){ + targets->getWPositions()[0][idxPart] += updatedXYZ[indexes[idxPart]*3+0]; + targets->getWPositions()[1][idxPart] += updatedXYZ[indexes[idxPart]*3+1]; + targets->getWPositions()[2][idxPart] += updatedXYZ[indexes[idxPart]*3+2]; + checkCount++; + } + }); + } + if(checkCount < NbPositions){std::cout << "Not all "<<NbPositions <<" potentials has been read"<< std::endl;} + else{ + if(checkCount > NbPositions){std::cout << "More parts than "<<NbPositions <<" potentials has been read"<< std::endl;} + } + update_tree(); + } + + + template<class ContainerClass,class LeafClass,class CellClass> + void generic_add_to_positions(FOctree<FReal,CellClass,ContainerClass,LeafClass> * octree, + int NbPositions,FReal * X, FReal * Y , FReal * Z, PartType type){ + int checkCount = 0; + if(type == SOURCE || type==BOTH){ + octree->forEachLeaf([&](LeafClass* leaf){ + ContainerClass * sources = leaf->getSrc(); + const FVector<FSize>& indexes = sources->getIndexes(); + FSize nbPartThere = sources->getNbParticles(); + for(FSize idxPart = 0 ; idxPart<nbPartThere ; ++idxPart){ + sources->getWPositions()[0][idxPart] += X[indexes[idxPart]]; + sources->getWPositions()[1][idxPart] += Y[indexes[idxPart]]; + sources->getWPositions()[2][idxPart] += Z[indexes[idxPart]]; + checkCount++; + } + }); + }else{//Targets + octree->forEachLeaf([&](LeafClass* leaf){ + ContainerClass * targets = leaf->getTargets(); + const FVector<FSize>& indexes = targets->getIndexes(); + FSize nbPartThere = targets->getNbParticles(); + for(FSize idxPart = 0 ; idxPart<nbPartThere ; ++idxPart){ + targets->getWPositions()[0][idxPart] += X[indexes[idxPart]]; + targets->getWPositions()[1][idxPart] += Y[indexes[idxPart]]; + targets->getWPositions()[2][idxPart] += Z[indexes[idxPart]]; + checkCount++; + } + }); + } + if(checkCount < NbPositions){std::cout << "Not all "<<NbPositions <<" potentials has been read"<< std::endl;} + else{ + if(checkCount > NbPositions){std::cout << "More parts than "<<NbPositions <<" potentials has been read"<< std::endl;} + } + update_tree(); + } + + //Not yet done + void add_to_positions_xyz_npart( int NbPositions, int* idxOfParticles, FReal * updatedXYZ, PartType type){ + FAssertLF(0,"Not Yet done ...\n"); + } + void add_to_positions_npart( int NbPositions, int* idxOfParticles, + FReal * X, FReal * Y , FReal * Z, PartType type){ + FAssertLF(0,"Not Yet done ...\n"); } - virtual void set_positions_xyz( int NbPositions, double * updatedXYZ){ + + template<class ContainerClass,class LeafClass,class CellClass> + void generic_set_positions_xyz(FOctree<FReal,CellClass,ContainerClass,LeafClass> * octree, + int NbPositions, FReal * updatedXYZ, PartType type){ + int checkCount = 0; + if(type == SOURCE || type==BOTH){ + octree->forEachLeaf([&](LeafClass* leaf){ + ContainerClass * sources = leaf->getSrc(); + const FVector<FSize>& indexes = sources->getIndexes(); + FSize nbPartThere = sources->getNbParticles(); + for(FSize idxPart = 0 ; idxPart<nbPartThere ; ++idxPart){ + sources->getWPositions()[0][idxPart] = updatedXYZ[indexes[idxPart]*3+0]; + sources->getWPositions()[1][idxPart] = updatedXYZ[indexes[idxPart]*3+1]; + sources->getWPositions()[2][idxPart] = updatedXYZ[indexes[idxPart]*3+2]; + checkCount++; + } + }); + }else{//Targets + octree->forEachLeaf([&](LeafClass* leaf){ + ContainerClass * targets = leaf->getTargets(); + const FVector<FSize>& indexes = targets->getIndexes(); + FSize nbPartThere = targets->getNbParticles(); + for(FSize idxPart = 0 ; idxPart<nbPartThere ; ++idxPart){ + targets->getWPositions()[0][idxPart] = updatedXYZ[indexes[idxPart]*3+0]; + targets->getWPositions()[1][idxPart] = updatedXYZ[indexes[idxPart]*3+1]; + targets->getWPositions()[2][idxPart] = updatedXYZ[indexes[idxPart]*3+2]; + checkCount++; + } + }); + } + if(checkCount < NbPositions){std::cout << "Not all "<<NbPositions <<" potentials has been read"<< std::endl;} + else{ + if(checkCount > NbPositions){std::cout << "More parts than "<<NbPositions <<" potentials has been read"<< std::endl;} + } + update_tree(); + } + + template<class ContainerClass,class LeafClass,class CellClass> + void generic_set_positions(FOctree<FReal,CellClass,ContainerClass,LeafClass> * octree, + int NbPositions, FReal * X, FReal * Y, FReal * Z, PartType type){ + int checkCount = 0; + if(type == SOURCE || type==BOTH){ + octree->forEachLeaf([&](LeafClass* leaf){ + ContainerClass * sources = leaf->getSrc(); + const FVector<FSize>& indexes = sources->getIndexes(); + FSize nbPartThere = sources->getNbParticles(); + for(FSize idxPart = 0 ; idxPart<nbPartThere ; ++idxPart){ + sources->getWPositions()[0][idxPart] = X[indexes[idxPart]]; + sources->getWPositions()[1][idxPart] = Y[indexes[idxPart]]; + sources->getWPositions()[2][idxPart] = Z[indexes[idxPart]]; + checkCount++; + } + }); + }else{//Targets + octree->forEachLeaf([&](LeafClass* leaf){ + ContainerClass * targets = leaf->getTargets(); + const FVector<FSize>& indexes = targets->getIndexes(); + FSize nbPartThere = targets->getNbParticles(); + for(FSize idxPart = 0 ; idxPart<nbPartThere ; ++idxPart){ + targets->getWPositions()[0][idxPart] = X[indexes[idxPart]]; + targets->getWPositions()[1][idxPart] = Y[indexes[idxPart]]; + targets->getWPositions()[2][idxPart] = Z[indexes[idxPart]]; + checkCount++; + } + }); + } + if(checkCount < NbPositions){std::cout << "Not all "<<NbPositions <<" potentials has been read"<< std::endl;} + else{ + if(checkCount > NbPositions){std::cout << "More parts than "<<NbPositions <<" potentials has been read"<< std::endl;} + } + update_tree(); + } + + template<class ContainerClass,class LeafClass,class CellClass> + void generic_set_positions_npart(FOctree<FReal,CellClass,ContainerClass,LeafClass> * octree, + int NbPositions,int* idxOfParticles,FReal * X, FReal * Y , FReal * Z, PartType type){ + int checkCount = 0; + if(type == SOURCE || type==BOTH){ + octree->forEachLeaf([&](LeafClass* leaf){ + ContainerClass * sources = leaf->getSrc(); + const FVector<FSize>& indexes = sources->getIndexes(); + FSize nbPartThere = sources->getNbParticles(); + for(FSize idxPart = 0 ; idxPart<nbPartThere ; ++idxPart){ + sources->getWPositions()[0][idxPart] = X[indexes[idxPart]]; + sources->getWPositions()[1][idxPart] = Y[indexes[idxPart]]; + sources->getWPositions()[2][idxPart] = Z[indexes[idxPart]]; + checkCount++; + } + }); + }else{//Targets + octree->forEachLeaf([&](LeafClass* leaf){ + ContainerClass * targets = leaf->getTargets(); + const FVector<FSize>& indexes = targets->getIndexes(); + FSize nbPartThere = targets->getNbParticles(); + for(FSize idxPart = 0 ; idxPart<nbPartThere ; ++idxPart){ + targets->getWPositions()[0][idxPart] = X[indexes[idxPart]]; + targets->getWPositions()[1][idxPart] = Y[indexes[idxPart]]; + targets->getWPositions()[2][idxPart] = Z[indexes[idxPart]]; + checkCount++; + } + }); + } + if(checkCount < NbPositions){std::cout << "Not all "<<NbPositions <<" potentials has been read"<< std::endl;} + else{ + if(checkCount > NbPositions){std::cout << "More parts than "<<NbPositions <<" potentials has been read"<< std::endl;} + } + update_tree(); + } + + template<class ContainerClass,class LeafClass,class CellClass> + void generic_set_positions_xyz_npart(FOctree<FReal,CellClass,ContainerClass,LeafClass> * octree, + int NbPositions,int * idxOfParticles,FReal * updatedXYZ, PartType type){ + int checkCount = 0; + if(type == SOURCE || type==BOTH){ + octree->forEachLeaf([&](LeafClass* leaf){ + ContainerClass * sources = leaf->getSrc(); + const FVector<FSize>& indexes = sources->getIndexes(); + FSize nbPartThere = sources->getNbParticles(); + for(FSize idxPart = 0 ; idxPart<nbPartThere ; ++idxPart){ + sources->getWPositions()[0][idxPart] = updatedXYZ[indexes[idxPart]*3+0]; + sources->getWPositions()[1][idxPart] = updatedXYZ[indexes[idxPart]*3+1]; + sources->getWPositions()[2][idxPart] = updatedXYZ[indexes[idxPart]*3+2]; + checkCount++; + } + }); + }else{//Targets + octree->forEachLeaf([&](LeafClass* leaf){ + ContainerClass * targets = leaf->getTargets(); + const FVector<FSize>& indexes = targets->getIndexes(); + FSize nbPartThere = targets->getNbParticles(); + for(FSize idxPart = 0 ; idxPart<nbPartThere ; ++idxPart){ + targets->getWPositions()[0][idxPart] = updatedXYZ[indexes[idxPart]*3+0]; + targets->getWPositions()[1][idxPart] = updatedXYZ[indexes[idxPart]*3+1]; + targets->getWPositions()[2][idxPart] = updatedXYZ[indexes[idxPart]*3+2]; + checkCount++; + } + }); + } + if(checkCount < NbPositions){std::cout << "Not all "<<NbPositions <<" potentials has been read"<< std::endl;} + else{ + if(checkCount > NbPositions){std::cout << "More parts than "<<NbPositions <<" potentials has been read"<< std::endl;} + } + update_tree(); + } + + virtual void set_positions_xyz_npart( int NbPositions, int* idxOfParticles, FReal * updatedXYZ, PartType type){ FAssertLF(0,"No tree instancied, exiting ...\n"); } - virtual void set_positions( int NbPositions, double * X, double * Y , double * Z){ + virtual void set_positions_npart( int NbPositions, int* idxOfParticles, + FReal * X, FReal * Y , FReal * Z, PartType type){ FAssertLF(0,"No tree instancied, exiting ...\n"); } - virtual void set_positions_xyz_npart( int NbPositions, int* idxOfParticles, double * updatedXYZ){ + virtual void set_positions_xyz( int NbPositions, FReal * updatedXYZ, PartType type){ FAssertLF(0,"No tree instancied, exiting ...\n"); } - virtual void set_positions_npart( int NbPositions, int* idxOfParticles, - double * X, double * Y , double * Z){ + virtual void set_positions( int NbPositions, FReal * X, FReal * Y , FReal * Z, PartType type){ FAssertLF(0,"No tree instancied, exiting ...\n"); } + virtual void get_positions_xyz(int NbPositions,FReal * updatedXYZ, PartType type){ + } + virtual void get_positions(int NbPositions,FReal * X,FReal * Y,FReal * Z, PartType type){ + } + virtual void get_positions_xyz_npart(int NbPositions,int * idxOfPart, FReal * updatedXYZ, PartType type){ + } + virtual void get_positions_npart(int NbPositions,int * idxOfPart, FReal * X,FReal * Y,FReal * Z, PartType type){ + } + //Function to update the tree virtual void update_tree(){ FAssertLF(0,"No tree instancied, exiting ...\n"); } - - //Function to get the positions - virtual void get_positions_xyz( int NbPositions, double * positionsToFill){ - FAssertLF(0,"No tree instancied, exiting ...\n"); + template<class ContainerClass,class LeafClass,class CellClass> + void generic_get_positions_xyz(FOctree<FReal,CellClass,ContainerClass,LeafClass> * octree, + int NbPositions, FReal * positionsToFill, PartType type){ + int checkCount = 0; + if(type == SOURCE || type==BOTH){ + octree->forEachLeaf([&](LeafClass* leaf){ + ContainerClass * sources = leaf->getSrc(); + const FVector<FSize>& indexes = sources->getIndexes(); + FSize nbPartThere = sources->getNbParticles(); + for(FSize idxPart = 0 ; idxPart<nbPartThere ; ++idxPart){ + positionsToFill[indexes[idxPart]*3+0] = sources->getPositions()[0][idxPart]; + positionsToFill[indexes[idxPart]*3+1] = sources->getPositions()[1][idxPart]; + positionsToFill[indexes[idxPart]*3+2] = sources->getPositions()[2][idxPart]; + checkCount++; + } + }); + }else{//Targets + octree->forEachLeaf([&](LeafClass* leaf){ + ContainerClass * targets = leaf->getTargets(); + const FVector<FSize>& indexes = targets->getIndexes(); + FSize nbPartThere = targets->getNbParticles(); + for(FSize idxPart = 0 ; idxPart<nbPartThere ; ++idxPart){ + positionsToFill[indexes[idxPart]*3+0] = targets->getPositions()[0][idxPart]; + positionsToFill[indexes[idxPart]*3+1] = targets->getPositions()[1][idxPart]; + positionsToFill[indexes[idxPart]*3+2] = targets->getPositions()[2][idxPart]; + checkCount++; + } + }); + } + if(checkCount < NbPositions){std::cout << "Not all "<<NbPositions <<" potentials has been read"<< std::endl;} + else{ + if(checkCount > NbPositions){std::cout << "More parts than "<<NbPositions <<" potentials has been read"<< std::endl;} + } } - virtual void get_positions( int NbPositions, double * X, double * Y , double * Z){ - FAssertLF(0,"No tree instancied, exiting ...\n"); + + template<class ContainerClass,class LeafClass,class CellClass> + void generic_get_positions_xyz_npart(FOctree<FReal,CellClass,ContainerClass,LeafClass> * octree, + int NbPositions, int * idxOfParticles, FReal * positionsToFill, PartType type){ + int checkCount = 0; + if(type == SOURCE || type==BOTH){ + octree->forEachLeaf([&](LeafClass* leaf){ + ContainerClass * sources = leaf->getSrc(); + const FVector<FSize>& indexes = sources->getIndexes(); + FSize nbPartThere = sources->getNbParticles(); + for(FSize idxPart = 0 ; idxPart<nbPartThere ; ++idxPart){ + int iterPart = 0; + bool notFoundYet = true; + while(iterPart < NbPositions && notFoundYet){ + if(indexes[idxPart] == idxOfParticles[iterPart]){ + positionsToFill[iterPart] = sources->getPositions()[0][idxPart]; + positionsToFill[iterPart] = sources->getPositions()[1][idxPart]; + positionsToFill[iterPart] = sources->getPositions()[2][idxPart]; + notFoundYet = false; + checkCount++; + } + else{ + ++iterPart; + } + } + } + }); + }else {//Targets + octree->forEachLeaf([&](LeafClass* leaf){ + ContainerClass * targets = leaf->getTargets(); + const FVector<FSize>& indexes = targets->getIndexes(); + FSize nbPartThere = targets->getNbParticles(); + for(FSize idxPart = 0 ; idxPart<nbPartThere ; ++idxPart){ + int iterPart = 0; + bool notFoundYet = true; + while(iterPart < NbPositions && notFoundYet){ + if(indexes[idxPart] == idxOfParticles[iterPart]){ + positionsToFill[iterPart] = targets->getPositions()[0][idxPart]; + positionsToFill[iterPart] = targets->getPositions()[1][idxPart]; + positionsToFill[iterPart] = targets->getPositions()[2][idxPart]; + notFoundYet = false; + checkCount++; + } + else{ + ++iterPart; + } + } + } + }); + } + if(checkCount < NbPositions){std::cout << "Not all "<<NbPositions <<" potentials has been read"<< std::endl;} + else{ + if(checkCount > NbPositions){std::cout << "More parts than "<<NbPositions <<" potentials has been read"<< std::endl;} + } } - virtual void get_positions_xyz_npart( int NbPositions, int* idxOfParticles, double * positionsToFill){ - FAssertLF(0,"No tree instancied, exiting ...\n"); + + template<class ContainerClass,class LeafClass,class CellClass> + void generic_get_positions(FOctree<FReal,CellClass,ContainerClass,LeafClass> * octree, + int NbPositions, FReal * X, FReal * Y , FReal * Z, PartType type){ + int checkCount = 0; + if(type == SOURCE || type==BOTH){ + octree->forEachLeaf([&](LeafClass* leaf){ + ContainerClass * sources = leaf->getSrc(); + const FVector<FSize>& indexes = sources->getIndexes(); + FSize nbPartThere = sources->getNbParticles(); + for(FSize idxPart = 0 ; idxPart<nbPartThere ; ++idxPart){ + X[indexes[idxPart]] = sources->getPositions()[0][idxPart]; + Y[indexes[idxPart]] = sources->getPositions()[1][idxPart]; + Z[indexes[idxPart]] = sources->getPositions()[2][idxPart]; + checkCount++; + } + }); + }else{//Targets + octree->forEachLeaf([&](LeafClass* leaf){ + ContainerClass * targets = leaf->getTargets(); + const FVector<FSize>& indexes = targets->getIndexes(); + FSize nbPartThere = targets->getNbParticles(); + for(FSize idxPart = 0 ; idxPart<nbPartThere ; ++idxPart){ + X[indexes[idxPart]] = targets->getPositions()[0][idxPart]; + Y[indexes[idxPart]] = targets->getPositions()[1][idxPart]; + Z[indexes[idxPart]] = targets->getPositions()[2][idxPart]; + checkCount++; + } + }); + } + if(checkCount < NbPositions){std::cout << "Not all "<<NbPositions <<" potentials has been read"<< std::endl;} + else{ + if(checkCount > NbPositions){std::cout << "More parts than "<<NbPositions <<" potentials has been read"<< std::endl;} + } } - virtual void get_positions_npart( int NbPositions, int* idxOfParticles, - double * X, double * Y , double * Z){ - FAssertLF(0,"No tree instancied, exiting ...\n"); + + template<class ContainerClass,class LeafClass,class CellClass> + void generic_get_positions_npart(FOctree<FReal,CellClass,ContainerClass,LeafClass> * octree, + int NbPositions, int * idxOfParticles,FReal * X, FReal * Y , FReal * Z,PartType type){ + int checkCount = 0; + if(type == SOURCE || type==BOTH){ + octree->forEachLeaf([&](LeafClass* leaf){ + ContainerClass * sources = leaf->getSrc(); + const FVector<FSize>& indexes = sources->getIndexes(); + FSize nbPartThere = sources->getNbParticles(); + for(FSize idxPart = 0 ; idxPart<nbPartThere ; ++idxPart){ + int iterPart = 0; + bool notFoundYet = true; + while(iterPart < NbPositions && notFoundYet){ + if(indexes[idxPart] == idxOfParticles[iterPart]){ + X[iterPart] = sources->getPositions()[0][idxPart]; + Y[iterPart] = sources->getPositions()[1][idxPart]; + Z[iterPart] = sources->getPositions()[2][idxPart]; + notFoundYet = false; + checkCount++; + } + else{ + ++iterPart; + } + } + } + }); + }else{//Targets + octree->forEachLeaf([&](LeafClass* leaf){ + ContainerClass * targets = leaf->getTargets(); + const FVector<FSize>& indexes = targets->getIndexes(); + FSize nbPartThere = targets->getNbParticles(); + for(FSize idxPart = 0 ; idxPart<nbPartThere ; ++idxPart){ + int iterPart = 0; + bool notFoundYet = true; + while(iterPart < NbPositions && notFoundYet){ + if(indexes[idxPart] == idxOfParticles[iterPart]){ + X[iterPart] = targets->getPositions()[0][idxPart]; + Y[iterPart] = targets->getPositions()[1][idxPart]; + Z[iterPart] = targets->getPositions()[2][idxPart]; + notFoundYet = false; + checkCount++; + } + else{ + ++iterPart; + } + } + } + }); + } + if(checkCount < NbPositions){std::cout << "Not all "<<NbPositions <<" potentials has been read"<< std::endl;} + else{ + if(checkCount > NbPositions){std::cout << "More parts than "<<NbPositions <<" potentials has been read"<< std::endl;} + } } virtual void reset_tree(Callback_reset_cell cellReset){ } - template<class FReal,class ContainerClass, class CellClass, class LeafClass> + template<class ContainerClass, class CellClass, class LeafClass> void generic_reset_tree(FOctree<FReal,CellClass,ContainerClass,LeafClass> * tree){ //Reset forces and potentials tree->forEachLeaf([&](LeafClass * leaf){ @@ -234,7 +797,7 @@ public: FAssertLF(0,"No user kernel defined, exiting ...\n"); } - virtual void execute_fmm(){ + virtual void execute_fmm(){ FAssertLF(0,"No kernel set, cannot execute anything, exiting ...\n"); } @@ -242,213 +805,223 @@ public: FAssertLF(0,"No kernel set, cannot execute anything, exiting ...\n"); } - virtual void hibox_Rinflu_display(FSize nbPartIn, double *Rinflu){ + + virtual void print_everything(){ } + /** + * Monitoring Function, once the FMM has ended, it's possible to + * get the time spent in each operator. + */ + virtual void get_timers(FReal * Timers){ + const FTic * timers = algoTimer->getAllTimers(); + int nbTimers = algoTimer->getNbOfTimerRecorded(); + for(int idTimer = 0; idTimer<nbTimers ; ++idTimer){ + Timers[idTimer] = timers[idTimer].elapsed(); + } + } + virtual int get_nb_timers(){ + return algoTimer->getNbOfTimerRecorded(); + } }; - +template<class FReal> struct ScalFmmCoreHandle { + struct ScalFmmCoreConfig { // Read/Write parameter int treeHeight; // Number of level in the octree - double boxWidth; // Simulation box size (root level) - FPoint<double> boxCenter; // Center position of the box simulation(FReal[3]) + FReal boxWidth; // Simulation box size (root level) + FPoint<FReal> boxCenter; // Center position of the box simulation(FReal[3]) }; ScalFmmCoreConfig config; - FScalFMMEngine* engine; + FScalFMMEngine<FReal>* engine; }; extern "C" void scalfmm_build_tree(scalfmm_handle Handle,int TreeHeight,double BoxWidth,double* BoxCenter,Scalfmm_Cell_Descriptor user_cell_descriptor){ - ((ScalFmmCoreHandle *) Handle)->engine->build_tree(TreeHeight,BoxWidth, BoxCenter, user_cell_descriptor); + ((ScalFmmCoreHandle<double> *) Handle)->engine->build_tree(TreeHeight,BoxWidth, BoxCenter, user_cell_descriptor); } -extern "C" void scalfmm_tree_insert_particles(scalfmm_handle Handle, int NbPositions, double * arrayX, double * arrayY, double * arrayZ){ - ((ScalFmmCoreHandle *) Handle)->engine->tree_insert_particles(NbPositions, arrayX, arrayY, arrayZ); +extern "C" void scalfmm_tree_insert_particles(scalfmm_handle Handle, int NbPositions, double * arrayX, double * arrayY, double * arrayZ, + PartType type){ + ((ScalFmmCoreHandle<double> *) Handle)->engine->tree_insert_particles(NbPositions, arrayX, arrayY, arrayZ, type); } -extern "C" void scalfmm_tree_insert_particles_xyz(scalfmm_handle Handle, int NbPositions, double * XYZ){ - ((ScalFmmCoreHandle * ) Handle)->engine->tree_insert_particles_xyz(NbPositions, XYZ); +extern "C" void scalfmm_tree_insert_particles_xyz(scalfmm_handle Handle, int NbPositions, double * XYZ, PartType type){ + ((ScalFmmCoreHandle<double> * ) Handle)->engine->tree_insert_particles_xyz(NbPositions, XYZ,type); } -extern "C" void scalfmm_set_physical_values(scalfmm_handle Handle, int nbPhysicalValues, double * physicalValues){ - ((ScalFmmCoreHandle * ) Handle)->engine->set_physical_values(nbPhysicalValues, physicalValues); +extern "C" void scalfmm_set_physical_values(scalfmm_handle Handle, int nbPhysicalValues, double * physicalValues, PartType type){ + ((ScalFmmCoreHandle<double> * ) Handle)->engine->set_physical_values(nbPhysicalValues, physicalValues, type); } -extern "C" void scalfmm_get_physical_values(scalfmm_handle Handle, int nbPhysicalValues, double * physicalValues){ - ((ScalFmmCoreHandle * ) Handle)->engine->get_physical_values(nbPhysicalValues, physicalValues); +extern "C" void scalfmm_get_physical_values(scalfmm_handle Handle, int nbPhysicalValues, double * physicalValues, PartType type){ + ((ScalFmmCoreHandle<double> * ) Handle)->engine->get_physical_values(nbPhysicalValues, physicalValues, type); } extern "C" void scalfmm_set_physical_values_npart(scalfmm_handle Handle, int nbPhysicalValues, - int* idxOfParticles, double * physicalValues){ - ((ScalFmmCoreHandle * ) Handle)->engine->set_physical_values_npart(nbPhysicalValues, - idxOfParticles, physicalValues); + int* idxOfParticles, double * physicalValues, PartType type){ + ((ScalFmmCoreHandle<double> * ) Handle)->engine->set_physical_values_npart(nbPhysicalValues, + idxOfParticles, physicalValues, type); } extern "C" void scalfmm_get_physical_values_npart(scalfmm_handle Handle, int nbPhysicalValues, - int* idxOfParticles, double * physicalValues){ - ((ScalFmmCoreHandle * ) Handle)->engine->get_physical_values_npart(nbPhysicalValues, - idxOfParticles, physicalValues); + int* idxOfParticles, double * physicalValues, PartType type){ + ((ScalFmmCoreHandle<double> * ) Handle)->engine->get_physical_values_npart(nbPhysicalValues, + idxOfParticles, physicalValues, type); } //To get the result -extern "C" void scalfmm_get_forces_xyz(scalfmm_handle Handle, int nbParts, double * forcesToFill){ - ((ScalFmmCoreHandle * ) Handle)->engine->get_forces_xyz(nbParts, forcesToFill); +extern "C" void scalfmm_get_forces_xyz(scalfmm_handle Handle, int nbParts, double * forcesToFill, PartType type){ + ((ScalFmmCoreHandle<double> * ) Handle)->engine->get_forces_xyz(nbParts, forcesToFill, type); } -extern "C" void scalfmm_get_forces_xyz_npart(scalfmm_handle Handle, int nbParts, int* idxOfParticles, double * forcesToFill){ - ((ScalFmmCoreHandle * ) Handle)->engine->get_forces_xyz_npart(nbParts, idxOfParticles, forcesToFill); +extern "C" void scalfmm_get_forces_xyz_npart(scalfmm_handle Handle, int nbParts, int* idxOfParticles, double * forcesToFill, PartType type){ + ((ScalFmmCoreHandle<double> * ) Handle)->engine->get_forces_xyz_npart(nbParts, idxOfParticles, forcesToFill, type); } -extern "C" void scalfmm_get_forces(scalfmm_handle Handle, int nbParts, double * fX, double* fY, double* fZ){ - ((ScalFmmCoreHandle * ) Handle)->engine->get_forces(nbParts,fX, fY, fZ) ; +extern "C" void scalfmm_get_forces(scalfmm_handle Handle, int nbParts, double * fX, double* fY, double* fZ, PartType type){ + ((ScalFmmCoreHandle<double> * ) Handle)->engine->get_forces(nbParts,fX, fY, fZ, type); } -extern "C" void scalfmm_get_forces_npart(scalfmm_handle Handle, int nbParts, int* idxOfParticles, double * fX, double* fY, double* fZ){ - ((ScalFmmCoreHandle * ) Handle)->engine->get_forces_npart(nbParts, idxOfParticles, fX, fY, fZ) ; +extern "C" void scalfmm_get_forces_npart(scalfmm_handle Handle, int nbParts, int* idxOfParticles, double * fX, double* fY, double* fZ, PartType type){ + ((ScalFmmCoreHandle<double> * ) Handle)->engine->get_forces_npart(nbParts, idxOfParticles, fX, fY, fZ, type); } //To set iniital condition -extern "C" void scalfmm_set_forces_xyz(scalfmm_handle Handle, int nbParts, double * forcesToFill){ - ((ScalFmmCoreHandle * ) Handle)->engine->set_forces_xyz(nbParts, forcesToFill) ; +extern "C" void scalfmm_set_forces_xyz(scalfmm_handle Handle, int nbParts, double * forcesToFill, PartType type){ + ((ScalFmmCoreHandle<double> * ) Handle)->engine->set_forces_xyz(nbParts, forcesToFill, type); } -extern "C" void scalfmm_set_forces_xyz_npart(scalfmm_handle Handle, int nbParts, int* idxOfParticles, double * forcesToFill){ - ((ScalFmmCoreHandle * ) Handle)->engine->set_forces_xyz_npart(nbParts, idxOfParticles, forcesToFill) ; +extern "C" void scalfmm_set_forces_xyz_npart(scalfmm_handle Handle, int nbParts, int* idxOfParticles, double * forcesToFill, PartType type){ + ((ScalFmmCoreHandle<double> * ) Handle)->engine->set_forces_xyz_npart(nbParts, idxOfParticles, forcesToFill, type); } -extern "C" void scalfmm_set_forces(scalfmm_handle Handle, int nbParts, double * fX, double* fY, double* fZ){ - ((ScalFmmCoreHandle * ) Handle)->engine->set_forces(nbParts, fX, fY, fZ) ; +extern "C" void scalfmm_set_forces(scalfmm_handle Handle, int nbParts, double * fX, double* fY, double* fZ, PartType type){ + ((ScalFmmCoreHandle<double> * ) Handle)->engine->set_forces(nbParts, fX, fY, fZ, type); } -extern "C" void scalfmm_set_forces_npart(scalfmm_handle Handle, int nbParts, int* idxOfParticles, double * fX, double* fY, double* fZ){ - ((ScalFmmCoreHandle * ) Handle)->engine->set_forces_npart(nbParts, idxOfParticles, fX, fY, fZ) ; +extern "C" void scalfmm_set_forces_npart(scalfmm_handle Handle, int nbParts, int* idxOfParticles, double * fX, double* fY, double* fZ, PartType type){ + ((ScalFmmCoreHandle<double> * ) Handle)->engine->set_forces_npart(nbParts, idxOfParticles, fX, fY, fZ, type); } //To deal with potential -extern "C" void scalfmm_get_potentials(scalfmm_handle Handle, int nbParts, double * potentialsToFill){ - ((ScalFmmCoreHandle * ) Handle)->engine->get_potentials(nbParts, potentialsToFill) ; +extern "C" void scalfmm_get_potentials(scalfmm_handle Handle, int nbParts, double * potentialsToFill, PartType type){ + ((ScalFmmCoreHandle<double> * ) Handle)->engine->get_potentials(nbParts, potentialsToFill, type); } -extern "C" void scalfmm_set_potentials(scalfmm_handle Handle, int nbParts, double * potentialsToFill){ - ((ScalFmmCoreHandle * ) Handle)->engine->set_potentials(nbParts, potentialsToFill) ; +extern "C" void scalfmm_set_potentials(scalfmm_handle Handle, int nbParts, double * potentialsToFill, PartType type){ + ((ScalFmmCoreHandle<double> * ) Handle)->engine->set_potentials(nbParts, potentialsToFill, type); } -extern "C" void scalfmm_get_potentials_npart(scalfmm_handle Handle, int nbParts, int* idxOfParticles, double * potentialsToFill){ - ((ScalFmmCoreHandle * ) Handle)->engine->get_potentials_npart(nbParts, idxOfParticles, potentialsToFill) ; +extern "C" void scalfmm_get_potentials_npart(scalfmm_handle Handle, int nbParts, int* idxOfParticles, double * potentialsToFill, PartType type){ + ((ScalFmmCoreHandle<double> * ) Handle)->engine->get_potentials_npart(nbParts, idxOfParticles, potentialsToFill, type); } -extern "C" void scalfmm_set_potentials_npart(scalfmm_handle Handle, int nbParts, int* idxOfParticles, double * potentialsToFill){ - ((ScalFmmCoreHandle * ) Handle)->engine->set_potentials_npart(nbParts, idxOfParticles, potentialsToFill) ; +extern "C" void scalfmm_set_potentials_npart(scalfmm_handle Handle, int nbParts, int* idxOfParticles, double * potentialsToFill, PartType type){ + ((ScalFmmCoreHandle<double> * ) Handle)->engine->set_potentials_npart(nbParts, idxOfParticles, potentialsToFill, type); } // //To deal with positions // //Out of the box behavior // extern "C" void scalfmm_out_of_the_box_config(scalfmm_handle Handle,scalfmm_out_of_box_behavior config){ -// ((ScalFmmCoreHandle * ) Handle)->engine->out_of_the_box_config(config); +// ((ScalFmmCoreHandle<double> * ) Handle)->engine->out_of_the_box_config(config); // } //Update -extern "C" void scalfmm_add_to_positions_xyz(scalfmm_handle Handle, int NbPositions, double * updatedXYZ){ - ((ScalFmmCoreHandle * ) Handle)->engine->add_to_positions_xyz(NbPositions, updatedXYZ); +extern "C" void scalfmm_add_to_positions_xyz(scalfmm_handle Handle, int NbPositions, double * updatedXYZ, PartType type){ + ((ScalFmmCoreHandle<double> * ) Handle)->engine->add_to_positions_xyz(NbPositions, updatedXYZ, type); } -extern "C" void scalfmm_add_to_positions(scalfmm_handle Handle, int NbPositions, double * X, double * Y , double * Z){ - ((ScalFmmCoreHandle * ) Handle)->engine->add_to_positions(NbPositions, X, Y, Z); +extern "C" void scalfmm_add_to_positions(scalfmm_handle Handle, int NbPositions, double * X, double * Y , double * Z, PartType type){ + ((ScalFmmCoreHandle<double> * ) Handle)->engine->add_to_positions(NbPositions, X, Y, Z, type); } -extern "C" void scalfmm_add_to_positions_xyz_npart(scalfmm_handle Handle, int NbPositions, int* idxOfParticles, double * updatedXYZ){ - ((ScalFmmCoreHandle * ) Handle)->engine->add_to_positions_xyz_npart(NbPositions, idxOfParticles, updatedXYZ); +extern "C" void scalfmm_add_to_positions_xyz_npart(scalfmm_handle Handle, int NbPositions, int* idxOfParticles, double * updatedXYZ, PartType type){ + ((ScalFmmCoreHandle<double> * ) Handle)->engine->add_to_positions_xyz_npart(NbPositions, idxOfParticles, updatedXYZ, type); } extern "C" void scalfmm_add_to_positions_npart(scalfmm_handle Handle, int NbPositions, int* idxOfParticles, - double * X, double * Y , double * Z){ - ((ScalFmmCoreHandle * ) Handle)->engine->add_to_positions_npart(NbPositions, idxOfParticles, X, Y, Z); + double * X, double * Y , double * Z, PartType type){ + ((ScalFmmCoreHandle<double> * ) Handle)->engine->add_to_positions_npart(NbPositions, idxOfParticles, X, Y, Z, type); } //Set new positions -extern "C" void scalfmm_set_positions_xyz(scalfmm_handle Handle, int NbPositions, double * updatedXYZ){ - ((ScalFmmCoreHandle * ) Handle)->engine->set_positions_xyz(NbPositions, updatedXYZ); +extern "C" void scalfmm_set_positions_xyz(scalfmm_handle Handle, int NbPositions, double * updatedXYZ, PartType type){ + ((ScalFmmCoreHandle<double> * ) Handle)->engine->set_positions_xyz(NbPositions, updatedXYZ, type); } -extern "C" void scalfmm_set_positions(scalfmm_handle Handle, int NbPositions, double * X, double * Y , double * Z){ - ((ScalFmmCoreHandle * ) Handle)->engine->set_positions(NbPositions, X, Y , Z); +extern "C" void scalfmm_set_positions(scalfmm_handle Handle, int NbPositions, double * X, double * Y , double * Z, PartType type){ + ((ScalFmmCoreHandle<double> * ) Handle)->engine->set_positions(NbPositions, X, Y , Z, type); } -extern "C" void scalfmm_set_positions_xyz_npart(scalfmm_handle Handle, int NbPositions, int* idxOfParticles, double * updatedXYZ){ - ((ScalFmmCoreHandle * ) Handle)->engine->set_positions_xyz_npart(NbPositions, idxOfParticles, updatedXYZ); +extern "C" void scalfmm_set_positions_xyz_npart(scalfmm_handle Handle, int NbPositions, int* idxOfParticles, double * updatedXYZ, PartType type){ + ((ScalFmmCoreHandle<double> * ) Handle)->engine->set_positions_xyz_npart(NbPositions, idxOfParticles, updatedXYZ, type); } extern "C" void scalfmm_set_positions_npart(scalfmm_handle Handle, int NbPositions, int* idxOfParticles, - double * X, double * Y , double * Z){ - ((ScalFmmCoreHandle * ) Handle)->engine->set_positions_npart(NbPositions, idxOfParticles, X, Y, Z); + double * X, double * Y , double * Z, PartType type){ + ((ScalFmmCoreHandle<double> * ) Handle)->engine->set_positions_npart(NbPositions, idxOfParticles, X, Y, Z, type); } extern "C" void scalfmm_update_tree(scalfmm_handle Handle){ - ((ScalFmmCoreHandle * ) Handle)->engine->update_tree(); + ((ScalFmmCoreHandle<double> * ) Handle)->engine->update_tree(); } //Get back positions -extern "C" void scalfmm_get_positions_xyz(scalfmm_handle Handle, int NbPositions, double * updatedXYZ){ - ((ScalFmmCoreHandle * ) Handle)->engine->get_positions_xyz(NbPositions, updatedXYZ); +extern "C" void scalfmm_get_positions_xyz(scalfmm_handle Handle, int NbPositions, double * updatedXYZ, PartType type){ + ((ScalFmmCoreHandle<double> * ) Handle)->engine->get_positions_xyz(NbPositions, updatedXYZ, type); } -extern "C" void scalfmm_get_positions(scalfmm_handle Handle, int NbPositions, double * X, double * Y , double * Z){ - ((ScalFmmCoreHandle * ) Handle)->engine->get_positions(NbPositions, X, Y , Z); +extern "C" void scalfmm_get_positions(scalfmm_handle Handle, int NbPositions, double * X, double * Y , double * Z, PartType type){ + ((ScalFmmCoreHandle<double> * ) Handle)->engine->get_positions(NbPositions, X, Y , Z, type); } -extern "C" void scalfmm_get_positions_xyz_npart(scalfmm_handle Handle, int NbPositions, int* idxOfParticles, double * updatedXYZ){ - ((ScalFmmCoreHandle * ) Handle)->engine->get_positions_xyz_npart(NbPositions, idxOfParticles, updatedXYZ); +extern "C" void scalfmm_get_positions_xyz_npart(scalfmm_handle Handle, int NbPositions, int* idxOfParticles, double * updatedXYZ, PartType type){ + ((ScalFmmCoreHandle<double> * ) Handle)->engine->get_positions_xyz_npart(NbPositions, idxOfParticles, updatedXYZ, type); } extern "C" void scalfmm_get_positions_npart(scalfmm_handle Handle, int NbPositions, int* idxOfParticles, - double * X, double * Y , double * Z){ - ((ScalFmmCoreHandle * ) Handle)->engine->get_positions_npart(NbPositions, idxOfParticles, X, Y, Z); + double * X, double * Y , double * Z, PartType type){ + ((ScalFmmCoreHandle<double> * ) Handle)->engine->get_positions_npart(NbPositions, idxOfParticles, X, Y, Z, type); } //To choose algorithm extern "C" void scalfmm_algorithm_config(scalfmm_handle Handle, scalfmm_algorithm config){ - ((ScalFmmCoreHandle * ) Handle)->engine->algorithm_config(config); + ((ScalFmmCoreHandle<double> * ) Handle)->engine->algorithm_config(config); } //Executing FMM extern "C" void scalfmm_execute_fmm(scalfmm_handle Handle){ - ((ScalFmmCoreHandle * ) Handle)->engine->execute_fmm(); + ((ScalFmmCoreHandle<double> * ) Handle)->engine->execute_fmm(); } extern "C" void scalfmm_user_kernel_config(scalfmm_handle Handle, Scalfmm_Kernel_Descriptor userKernel, void * userDatas){ - ((ScalFmmCoreHandle * ) Handle)->engine->user_kernel_config(userKernel,userDatas); + ((ScalFmmCoreHandle<double> * ) Handle)->engine->user_kernel_config(userKernel,userDatas); } -/** - * These functions are just translating functions. - */ - -//< This function fill the childFullPosition[3] with [-1;1] to know the position of a child relatively to -//< its position from its parent -extern "C" void scalfmm_utils_parentChildPosition(int childPosition, int* childFullPosition){ - childFullPosition[2] = (childPosition%2 ? 1 : -1); - childFullPosition[1] = ((childPosition/2)%2 ? 1 : -1); - childFullPosition[0] = ((childPosition/4)%2 ? 1 : -1); +//Monitoring functions +extern "C" void scalfmm_get_timers(scalfmm_handle Handle, double * Timers){ + ((ScalFmmCoreHandle<double> * ) Handle)->engine->get_timers(Timers); } -//< This function fill the childFullPosition[3] with [-3;3] to know the position of a interaction -//< cell relatively to its position from the target -extern "C" void scalfmm_utils_interactionPosition(int interactionPosition, int* srcPosition){ - srcPosition[2] = interactionPosition%7 - 3; - srcPosition[1] = (interactionPosition/7)%7 - 3; - srcPosition[0] = (interactionPosition/49)%7 - 3; +extern "C" int scalfmm_get_nb_timers(scalfmm_handle Handle){ + return ((ScalFmmCoreHandle<double> * ) Handle)->engine->get_nb_timers(); } +// extern "C" void scalfmm_tree_abstract_insert(scalfmm_handle Handle, int NbPartToInsert, int nbAttributeToInsert, int * strideForEachAtt, +// double* rawDatas){ +// ((ScalFmmCoreHandle<double> * ) Handle)->engine->tree_abstract_insert(NbPartToInsert,nbAttributeToInsert,strideForEachAtt,rawDatas); +// } extern "C" void scalfmm_reset_tree(scalfmm_handle Handle, Callback_reset_cell cellReseter){ - ((ScalFmmCoreHandle * ) Handle)->engine->reset_tree(cellReseter); + ((ScalFmmCoreHandle<double> * ) Handle)->engine->reset_tree(cellReseter); } -extern "C" void scalfmm_hibox_Rinflu_display(scalfmm_handle Handle, FSize nbPart, double * Rinflu){ - ((ScalFmmCoreHandle * ) Handle)->engine->hibox_Rinflu_display(nbPart, Rinflu); +extern "C" void scalfmm_print_everything(scalfmm_handle Handle){ + ((ScalFmmCoreHandle<double> * ) Handle)->engine->print_everything(); } #endif diff --git a/Addons/CKernelApi/Src/FScalfmmApiInit.cpp b/Addons/CKernelApi/Src/FScalfmmApiInit.cpp index c2181e59a82ed35342193df8c468294d5c9a9e38..19cfb08b58f2af3efe39360366477cc04ed290cf 100644 --- a/Addons/CKernelApi/Src/FScalfmmApiInit.cpp +++ b/Addons/CKernelApi/Src/FScalfmmApiInit.cpp @@ -6,48 +6,94 @@ extern "C" { #include "FInterEngine.hpp" #include "FUserKernelEngine.hpp" -extern "C" scalfmm_handle scalfmm_init(/*int TreeHeight,double BoxWidth,double* BoxCenter, */scalfmm_kernel_type KernelType){ - ScalFmmCoreHandle * handle = new ScalFmmCoreHandle(); +extern "C" scalfmm_handle scalfmm_init(/*int TreeHeight,double BoxWidth,double* BoxCenter, */scalfmm_kernel_type KernelType, + scalfmm_algorithm algo){ + ScalFmmCoreHandle<double> * handle = new ScalFmmCoreHandle<double>(); typedef double FReal; - switch(KernelType){ - case 0: - handle->engine = new FUserKernelEngine<FReal>(/*TreeHeight, BoxWidth, BoxCenter, */KernelType); - break; + if(algo == source_target){ - case 1: - //TODO typedefs - typedef FP2PParticleContainerIndexed<FReal> ContainerClass; - typedef FChebCell<FReal,7> ChebCell; + switch(KernelType){ + case 0: + typedef FP2PParticleContainerIndexed<FReal> ContainerClass; + typedef FTypedLeaf<FReal,ContainerClass> LeafClass; - typedef FInterpMatrixKernelR<FReal> MatrixKernelClass; - typedef FChebSymKernel<FReal,ChebCell,ContainerClass,MatrixKernelClass,7> ChebKernel; + handle->engine = new FUserKernelEngine<FReal,LeafClass>(/*TreeHeight, BoxWidth, BoxCenter, */KernelType); + break; - handle->engine = new FInterEngine<FReal,ChebCell,ChebKernel>(/*TreeHeight,BoxWidth,BoxCenter, */KernelType); - break; - // case 2: - // //TODO typedefs - // typedef FP2PParticleContainerIndexed<FReal> ContainerClass; - // typedef FUnifCell<7> UnifCell; + case 1: + //TODO typedefs + typedef FP2PParticleContainerIndexed<FReal> ContainerClass; + typedef FTypedChebCell<FReal,7> ChebCell; + typedef FTypedLeaf<FReal,ContainerClass> LeafClass; - // typedef FInterpMatrixKernelR<FReal> MatrixKernelClass; - // typedef FUnifKernel<UnifCell,ContainerClass,MatrixKernelClass,7> UnifKernel; + typedef FInterpMatrixKernelR<FReal> MatrixKernelClass; + typedef FChebSymKernel<FReal,ChebCell,ContainerClass,MatrixKernelClass,7> ChebKernel; - // handle->engine = new FInterEngine<UnifCell,UnifKernel>(/*TreeHeight,BoxWidth,BoxCenter, */KernelType); - // break; + handle->engine = new FInterEngine<FReal,ChebCell,ChebKernel,LeafClass>(/*TreeHeight,BoxWidth,BoxCenter, */KernelType); + break; + // case 2: + // //TODO typedefs + // typedef FP2PParticleContainerIndexed<FReal> ContainerClass; + // typedef FUnifCell<7> UnifCell; - default: - std::cout<< "Kernel type unsupported" << std::endl; - exit(0); - break; + // typedef FInterpMatrixKernelR<FReal> MatrixKernelClass; + // typedef FUnifKernel<UnifCell,ContainerClass,MatrixKernelClass,7> UnifKernel; + + // handle->engine = new FInterEngine<UnifCell,UnifKernel>(/*TreeHeight,BoxWidth,BoxCenter, */KernelType); + // break; + + default: + std::cout<< "Kernel type unsupported" << std::endl; + exit(0); + break; + } + } + else{ //No Source/Targets distinction + switch(KernelType){ + case 0: + typedef FP2PParticleContainerIndexed<FReal> ContainerClass; + typedef FSimpleLeaf<FReal,ContainerClass> LeafClass; + + handle->engine = new FUserKernelEngine<FReal,LeafClass>(/*TreeHeight, BoxWidth, BoxCenter, */KernelType); + break; + + case 1: + //TODO typedefs + typedef FP2PParticleContainerIndexed<FReal> ContainerClass; + typedef FChebCell<FReal,7> ChebCell; + typedef FSimpleLeaf<FReal,ContainerClass> LeafClass; + + typedef FInterpMatrixKernelR<FReal> MatrixKernelClass; + typedef FChebSymKernel<FReal,ChebCell,ContainerClass,MatrixKernelClass,7> ChebKernel; + + handle->engine = new FInterEngine<FReal,ChebCell,ChebKernel,LeafClass>(/*TreeHeight,BoxWidth,BoxCenter, */KernelType); + break; + // case 2: + // //TODO typedefs + // typedef FP2PParticleContainerIndexed<FReal> ContainerClass; + // typedef FUnifCell<7> UnifCell; + + // typedef FInterpMatrixKernelR<FReal> MatrixKernelClass; + // typedef FUnifKernel<UnifCell,ContainerClass,MatrixKernelClass,7> UnifKernel; + + // handle->engine = new FInterEngine<UnifCell,UnifKernel>(/*TreeHeight,BoxWidth,BoxCenter, */KernelType); + // break; + + default: + std::cout<< "Kernel type unsupported" << std::endl; + exit(0); + break; + + } } - return handle; + return handle; } extern "C" void scalfmm_dealloc_handle(scalfmm_handle handle, Callback_free_cell userDeallocator){ - ((ScalFmmCoreHandle *) handle)->engine->intern_dealloc_handle(userDeallocator); - delete ((ScalFmmCoreHandle *) handle)->engine ; - delete (ScalFmmCoreHandle *) handle; + ((ScalFmmCoreHandle<double> *) handle)->engine->intern_dealloc_handle(userDeallocator); + delete ((ScalFmmCoreHandle<double> *) handle)->engine ; + delete (ScalFmmCoreHandle<double> *) handle; } /** diff --git a/Addons/CKernelApi/Src/FUserKernelEngine.hpp b/Addons/CKernelApi/Src/FUserKernelEngine.hpp index 10e3f5a64c56b3df9516d5553080d8e22cbddf69..834ee6a40bea408330a8a0ca2b9417180f1cb058 100644 --- a/Addons/CKernelApi/Src/FUserKernelEngine.hpp +++ b/Addons/CKernelApi/Src/FUserKernelEngine.hpp @@ -27,7 +27,7 @@ /** * @brief CoreCell : Cell used to store User datas */ -class CoreCell : public FBasicCell { +class CoreCell : public FBasicCell, public FExtendCellType { // Mutable in order to work with the API mutable void* userData; @@ -87,7 +87,6 @@ Scalfmm_Cell_Descriptor CoreCell::user_cell_descriptor; */ template< class CellClass, class ContainerClass> class CoreKernel : public FAbstractKernels<CellClass,ContainerClass> { - Scalfmm_Kernel_Descriptor kernel; void* userData; @@ -196,48 +195,50 @@ public: ContainerClass* const [27], const int ){ } + //Getter + void * getUserKernelDatas(){ + return userData; + } + }; -template<class FReal> -class FUserKernelEngine : public FScalFMMEngine{ +template<class FReal,class LeafClass> +class FUserKernelEngine : public FScalFMMEngine<FReal>{ private: + //Typedefs + typedef FP2PParticleContainerIndexed<FReal> ContainerClass; //Typedefs : - typedef FP2PParticleContainerIndexed<FReal> ContainerClass; - typedef FSimpleLeaf<FReal, ContainerClass> LeafClass; - typedef FOctree<FReal,CoreCell,ContainerClass,LeafClass> OctreeClass; - typedef CoreKernel<CoreCell,ContainerClass> CoreKernelClass; + typedef FOctree<FReal,CoreCell,ContainerClass,LeafClass> OctreeClass; - //For arranger classes - typedef FBasicParticleContainerIndexedMover<FReal, OctreeClass, ContainerClass> MoverClass; - typedef FOctreeArranger<FReal,OctreeClass, ContainerClass, MoverClass> ArrangerClass; - typedef FArrangerPeriodic<FReal,OctreeClass, ContainerClass, MoverClass> ArrangerClassPeriodic; + typedef CoreKernel<CoreCell,ContainerClass> CoreKernelClass; + //For arranger classes //Attributes OctreeClass * octree; CoreKernelClass * kernel; - ArrangerClass * arranger; + + // ArrangerClass * arranger; + // ArrangerClassTyped * arrangerTyped; + // ArrangerClassPeriodic * arrangerPeriodic; public: FUserKernelEngine(/*int TreeHeight, double BoxWidth , double * BoxCenter, */scalfmm_kernel_type KernelType) : - octree(nullptr), kernel(nullptr), arranger(nullptr){ - // octree = new OctreeClass(TreeHeight,FMath::Min(3,TreeHeight-1),BoxWidth,FPoint<FReal>(BoxCenter)); - kernelType = KernelType; - //Kernel is not set now because the user must provide a - //Scalfmm_Kernel_descriptor + octree(nullptr), kernel(nullptr) /*,arranger(nullptr)*/ { + FScalFMMEngine<FReal>::kernelType = KernelType; } ~FUserKernelEngine(){ delete octree; octree=nullptr; - if(arranger){ - delete arranger; - arranger=nullptr; - } + // if(arranger){ + // delete arranger; + // arranger=nullptr; + // } if(kernel){ delete kernel; kernel=nullptr; @@ -255,181 +256,154 @@ public: this->octree = new OctreeClass(TreeHeight,FMath::Min(3,TreeHeight-1),BoxWidth,FPoint<FReal>(BoxCenter)); } - void tree_insert_particles( int NbPositions, double * arrayX, double * arrayY, double * arrayZ){ - for(int idPart = 0; idPart<NbPositions ; ++idPart){ - octree->insert(FPoint<FReal>(arrayX[idPart],arrayY[idPart],arrayZ[idPart]),idPart); + void reset_tree(Callback_reset_cell cellReset){ + double boxwidth = octree->getBoxWidth(); + FPoint<FReal> BoxCenter = octree->getBoxCenter(); + double boxCorner[3]; + boxCorner[0] = BoxCenter.getX() - boxwidth/2.0; + boxCorner[1] = BoxCenter.getY() - boxwidth/2.0; + boxCorner[2] = BoxCenter.getZ() - boxwidth/2.0; + //apply user function reset on each user's cell + octree->forEachCellWithLevel([&](CoreCell * currCell,const int currLevel){ + if(currCell->getContainer()){ + FTreeCoordinate currCoord = currCell->getCoordinate(); + int arrayCoord[3] = {currCoord.getX(),currCoord.getY(),currCoord.getZ()}; + MortonIndex currMorton = currCoord.getMortonIndex(currLevel); + double position[3]; + position[0] = boxCorner[0] + currCoord.getX()*boxwidth/double(1<<currLevel); + position[1] = boxCorner[1] + currCoord.getY()*boxwidth/double(1<<currLevel); + position[2] = boxCorner[2] + currCoord.getZ()*boxwidth/double(1<<currLevel); + cellReset(currLevel,currMorton,arrayCoord,position,currCell->getContainer()); + } + }); + } + + + void tree_insert_particles( int NbPositions, double * X, double * Y, double * Z, PartType type){ + if(type == BOTH){ + for(FSize idPart = 0; idPart<NbPositions ; ++idPart){ + octree->insert(FPoint<FReal>(X[idPart],Y[idPart],Z[idPart]),idPart); + } + FScalFMMEngine<FReal>::nbPart += NbPositions; + }else{ + if(type==SOURCE){ + for(FSize idPart = 0; idPart<NbPositions ; ++idPart){ + octree->insert(FPoint<FReal>(X[idPart],Y[idPart],Z[idPart]),FParticleTypeSource,idPart); + } + FScalFMMEngine<FReal>::nbPart += NbPositions; + }else{ + for(FSize idPart = 0; idPart<NbPositions ; ++idPart){ + octree->insert(FPoint<FReal>(X[idPart],Y[idPart],Z[idPart]),FParticleTypeTarget,idPart); + } + FScalFMMEngine<FReal>::nbPart += NbPositions; + } } - nbPart += NbPositions; + this->init_cell(); } - void tree_insert_particles_xyz( int NbPositions, double * XYZ){ - for(int idPart = 0; idPart<NbPositions ; ++idPart){ - octree->insert(FPoint<FReal>(&XYZ[3*idPart]),idPart); + void tree_insert_particles_xyz( int NbPositions, double * XYZ, PartType type){ + if(type == BOTH){ + for(FSize idPart = 0; idPart<NbPositions ; ++idPart){ + octree->insert(FPoint<FReal>(&XYZ[3*idPart]),idPart); + } + FScalFMMEngine<FReal>::nbPart += NbPositions; + }else{ + if(type==SOURCE){ + for(FSize idPart = 0; idPart<NbPositions ; ++idPart){ + octree->insert(FPoint<FReal>(&XYZ[3*idPart]),FParticleTypeSource,idPart); + } + FScalFMMEngine<FReal>::nbPart += NbPositions; + }else{ + for(FSize idPart = 0; idPart<NbPositions ; ++idPart){ + octree->insert(FPoint<FReal>(&XYZ[3*idPart]),FParticleTypeTarget,idPart); + } + FScalFMMEngine<FReal>::nbPart += NbPositions; + } } - nbPart += NbPositions; this->init_cell(); } /** * To retrieve the positions, in order to move the parts */ - void get_positions_xyz(int NbPositions, double * positionsToFill){ - octree->forEachLeaf([&](LeafClass* leaf){ - ContainerClass * sources = leaf->getSrc(); - const FVector<FSize>& indexes = leaf->getTargets()->getIndexes(); - FSize nbPartThere = sources->getNbParticles(); - for(FSize idxPart = 0 ; idxPart<nbPartThere ; ++idxPart){ - positionsToFill[indexes[idxPart]*3+0] = sources->getPositions()[0][idxPart]; - positionsToFill[indexes[idxPart]*3+1] = sources->getPositions()[1][idxPart]; - positionsToFill[indexes[idxPart]*3+2] = sources->getPositions()[2][idxPart]; - } - }); + void get_positions_xyz(int NbPositions, double * positionsToFill, PartType type){ + FScalFMMEngine<FReal>::template generic_get_positions_xyz<ContainerClass,LeafClass,CoreCell>(octree,NbPositions,positionsToFill,type); } - void get_positions_xyz_npart(int NbPositions, int * idxOfParticles, double * positionsToFill){ - octree->forEachLeaf([&](LeafClass* leaf){ - ContainerClass * sources = leaf->getSrc(); - const FVector<FSize>& indexes = leaf->getTargets()->getIndexes(); - FSize nbPartThere = sources->getNbParticles(); - for(FSize idxPart = 0 ; idxPart<nbPartThere ; ++idxPart){ - int iterPart = 0; - bool notFoundYet = true; - while(iterPart < NbPositions && notFoundYet){ - if(indexes[idxPart] == idxOfParticles[iterPart]){ - positionsToFill[indexes[idxPart]*3+0] = sources->getPositions()[0][idxPart]; - positionsToFill[indexes[idxPart]*3+1] = sources->getPositions()[1][idxPart]; - positionsToFill[indexes[idxPart]*3+2] = sources->getPositions()[2][idxPart]; - notFoundYet = false; - } - else{ - ++iterPart; - } - } - } - }); + void get_positions_xyz_npart(int NbPositions, int * idxOfParticles, double * positionsToFill,PartType type){ + FScalFMMEngine<FReal>::template generic_get_positions_xyz_npart<ContainerClass,LeafClass,CoreCell>(octree,NbPositions,idxOfParticles,positionsToFill,type); } - void get_positions( int NbPositions, double * X, double * Y , double * Z){ - octree->forEachLeaf([&](LeafClass* leaf){ - ContainerClass * sources = leaf->getSrc(); - const FVector<FSize>& indexes = leaf->getTargets()->getIndexes(); - FSize nbPartThere = sources->getNbParticles(); - for(FSize idxPart = 0 ; idxPart<nbPartThere ; ++idxPart){ - X[indexes[idxPart]] = sources->getPositions()[0][idxPart]; - Y[indexes[idxPart]] = sources->getPositions()[1][idxPart]; - Z[indexes[idxPart]] = sources->getPositions()[2][idxPart]; - } - }); + void get_positions( int NbPositions, double *X, double *Y , double *Z, PartType type){ + FScalFMMEngine<FReal>::template generic_get_positions<ContainerClass,LeafClass,CoreCell>(octree,NbPositions,X,Y,Z,type); } - void get_positions_npart(int NbPositions, int * idxOfParticles,double * X, double * Y , double * Z){ - octree->forEachLeaf([&](LeafClass* leaf){ - ContainerClass * sources = leaf->getSrc(); - const FVector<FSize>& indexes = leaf->getTargets()->getIndexes(); - FSize nbPartThere = sources->getNbParticles(); - for(FSize idxPart = 0 ; idxPart<nbPartThere ; ++idxPart){ - int iterPart = 0; - bool notFoundYet = true; - while(iterPart < NbPositions && notFoundYet){ - if(indexes[idxPart] == idxOfParticles[iterPart]){ - X[indexes[idxPart]] = sources->getPositions()[0][idxPart]; - Y[indexes[idxPart]] = sources->getPositions()[1][idxPart]; - Z[indexes[idxPart]] = sources->getPositions()[2][idxPart]; - notFoundYet = false; - } - else{ - ++iterPart; - } - } - } - }); + void get_positions_npart(int NbPositions, int * idxOfParticles,double * X, double * Y , double * Z,PartType type){ + FScalFMMEngine<FReal>::template generic_get_positions_npart<ContainerClass,LeafClass,CoreCell>(octree,NbPositions,idxOfParticles,X,Y,Z,type); } //Arranger parts : following function provide a way to move parts //inside the tree - void add_to_positions_xyz(int NbPositions,double * updatedXYZ){ - octree->forEachLeaf([&](LeafClass* leaf){ - ContainerClass * sources = leaf->getSrc(); - const FVector<FSize>& indexes = leaf->getTargets()->getIndexes(); - FSize nbPartThere = sources->getNbParticles(); - for(FSize idxPart = 0 ; idxPart<nbPartThere ; ++idxPart){ - sources->getWPositions()[0][idxPart] += updatedXYZ[indexes[idxPart]*3+0]; - sources->getWPositions()[1][idxPart] += updatedXYZ[indexes[idxPart]*3+1]; - sources->getWPositions()[2][idxPart] += updatedXYZ[indexes[idxPart]*3+2]; - } - }); + void add_to_positions_xyz(int NbPositions,double * updatedXYZ,PartType type){ + FScalFMMEngine<FReal>::template generic_add_to_positions_xyz<ContainerClass,LeafClass,CoreCell>(octree,NbPositions,updatedXYZ,type); } - void add_to_positions(int NbPositions,double * X, double * Y , double * Z){ - octree->forEachLeaf([&](LeafClass* leaf){ - ContainerClass * sources = leaf->getSrc(); - const FVector<FSize>& indexes = leaf->getTargets()->getIndexes(); - FSize nbPartThere = sources->getNbParticles(); - for(FSize idxPart = 0 ; idxPart<nbPartThere ; ++idxPart){ - sources->getWPositions()[0][idxPart] += X[indexes[idxPart]]; - sources->getWPositions()[1][idxPart] += Y[indexes[idxPart]]; - sources->getWPositions()[2][idxPart] += Z[indexes[idxPart]]; - } - }); + void add_to_positions(int NbPositions,double * X, double * Y , double * Z,PartType type){ + FScalFMMEngine<FReal>::template generic_add_to_positions<ContainerClass,LeafClass,CoreCell>(octree,NbPositions,X,Y,Z,type); } + void set_positions_xyz(int NbPositions, FReal * updatedXYZ, PartType type){ + FScalFMMEngine<FReal>::template generic_set_positions_xyz<ContainerClass,LeafClass,CoreCell>(octree,NbPositions,updatedXYZ,type); + } - void set_positions_xyz(int NbPositions, double * updatedXYZ){ - octree->forEachLeaf([&](LeafClass* leaf){ - ContainerClass * sources = leaf->getSrc(); - const FVector<FSize>& indexes = leaf->getTargets()->getIndexes(); - FSize nbPartThere = sources->getNbParticles(); - for(FSize idxPart = 0 ; idxPart<nbPartThere ; ++idxPart){ - sources->getWPositions()[0][idxPart] = updatedXYZ[indexes[idxPart]*3+0]; - sources->getWPositions()[1][idxPart] = updatedXYZ[indexes[idxPart]*3+1]; - sources->getWPositions()[2][idxPart] = updatedXYZ[indexes[idxPart]*3+2]; - } - }); + void set_positions(int NbPositions, FReal * X,FReal * Y,FReal * Z, PartType type){ + FScalFMMEngine<FReal>::template generic_set_positions<ContainerClass,LeafClass,CoreCell>(octree,NbPositions,X,Y,Z,type); } - void set_positions(int NbPositions, double * X, double * Y, double * Z){ - octree->forEachLeaf([&](LeafClass* leaf){ - ContainerClass * sources = leaf->getSrc(); - const FVector<FSize>& indexes = leaf->getTargets()->getIndexes(); - FSize nbPartThere = sources->getNbParticles(); - for(FSize idxPart = 0 ; idxPart<nbPartThere ; ++idxPart){ - sources->getWPositions()[0][idxPart] = X[indexes[idxPart]]; - sources->getWPositions()[1][idxPart] = Y[indexes[idxPart]]; - sources->getWPositions()[2][idxPart] = Z[indexes[idxPart]]; - } - }); + void set_positions_xyz_npart(int NbPositions, int* idxOfParticles, FReal * updatedXYZ, PartType type){ + FScalFMMEngine<FReal>::template generic_set_positions_xyz_npart<ContainerClass,LeafClass,CoreCell>(octree,NbPositions,idxOfParticles,updatedXYZ,type); + } + void set_positions_npart(int NbPositions, int* idxOfParticles, FReal * X, FReal * Y , FReal * Z, PartType type){ + FScalFMMEngine<FReal>::template generic_set_positions_npart<ContainerClass,LeafClass,CoreCell>(octree,NbPositions,idxOfParticles,X,Y,Z,type); } - void update_tree(){ - if(arranger){ - arranger->rearrange(); - //then, we need to re-allocate cells user data for the - //cells created during the process and free user datas for - //the cells removed during the process - init_cell(); - } - else{ - if(Algorithm == 2){ //case in wich the periodic algorithm is used - arranger = new ArrangerClassPeriodic(octree); - arranger->rearrange(); - } - else{ - arranger = new ArrangerClass(octree); - arranger->rearrange(); - init_cell(); - } - } - } + // void update_tree(){ + // if(arranger){ + // arranger->rearrange(); + // //then, we need to re-allocate cells user data for the + // //cells created during the process and free user datas for + // //the cells removed during the process + // init_cell(); + // } + // else{ + // if(FScalFMMEngine<FReal>::Algorithm == 2){ //case in wich the periodic algorithm is used + // arranger = new ArrangerClassPeriodic(octree); + // arranger->rearrange(); + // init_cell(); + // } + // else{ + // arranger = new ArrangerClass(octree); + // arranger->rearrange(); + // init_cell(); + // } + // } + // } /* * Call the user allocator on userDatas member field of each cell */ void init_cell(){ - + void * generic_ptr = nullptr; + if(kernel){ + generic_ptr = kernel->getUserKernelDatas(); + } + else{ + std::cout <<"Warning, no user kernel data set, need to call kernel config first"<< std::endl; + } double boxwidth = octree->getBoxWidth(); FPoint<FReal> BoxCenter = octree->getBoxCenter(); double boxCorner[3]; @@ -446,7 +420,7 @@ public: position[0] = boxCorner[0] + currCoord.getX()*boxwidth/double(1<<currLevel); position[1] = boxCorner[1] + currCoord.getY()*boxwidth/double(1<<currLevel); position[2] = boxCorner[2] + currCoord.getZ()*boxwidth/double(1<<currLevel); - currCell->setContainer(CoreCell::GetInit()(currLevel,currMorton,arrayCoord,position)); + currCell->setContainer(CoreCell::GetInit()(currLevel,currMorton,arrayCoord,position,generic_ptr)); } }); } @@ -463,19 +437,21 @@ public: void execute_fmm(){ FAssertLF(kernel,"No kernel set, please use scalfmm_user_kernel_config before calling the execute routine ... Exiting \n"); - switch(Algorithm){ + switch(FScalFMMEngine<FReal>::Algorithm){ case 0: { typedef FFmmAlgorithm<OctreeClass,CoreCell,ContainerClass,CoreKernelClass,LeafClass> AlgoClassSeq; - AlgoClassSeq algoSeq(octree,kernel); - algoSeq.execute(); + AlgoClassSeq * algoSeq = new AlgoClassSeq(octree,kernel); + algoSeq->execute(); + FScalFMMEngine<FReal>::algoTimer = algoSeq; break; } case 1: { typedef FFmmAlgorithmThread<OctreeClass,CoreCell,ContainerClass,CoreKernelClass,LeafClass> AlgoClassThread; - AlgoClassThread algoThread(octree,kernel); - algoThread.execute(); + AlgoClassThread* algoThread = new AlgoClassThread(octree,kernel); + algoThread->execute(); + FScalFMMEngine<FReal>::algoTimer = algoThread; break; } case 2: @@ -486,34 +462,20 @@ public: algoPeriod.execute(); break; } + case 3: + { + // typedef FFmmAlgorithmThreadTsm<OctreeClass,CoreCell,ContainerClass,CoreKernelClass,LeafClass> AlgoClassTargetSource; + // AlgoClassTargetSource* algoTS = new AlgoClassTargetSource(octree,kernel); + // algoTS->execute(); + // FScalFMMEngine<FReal>::algoTimer = algoTS; + // break; + } default : - std::cout<< "No algorithm found (probably for strange reasons) : "<< Algorithm <<" exiting" << std::endl; + std::cout<< "No algorithm found (probably for strange reasons) : "<< FScalFMMEngine<FReal>::Algorithm <<" exiting" << std::endl; } } - void reset_tree(Callback_reset_cell cellReset){ - double boxwidth = octree->getBoxWidth(); - FPoint<FReal> BoxCenter = octree->getBoxCenter(); - double boxCorner[3]; - boxCorner[0] = BoxCenter.getX() - boxwidth/2.0; - boxCorner[1] = BoxCenter.getY() - boxwidth/2.0; - boxCorner[2] = BoxCenter.getZ() - boxwidth/2.0; - //apply user function reset on each user's cell - octree->forEachCellWithLevel([&](CoreCell * currCell,const int currLevel){ - if(currCell->getContainer()){ - FTreeCoordinate currCoord = currCell->getCoordinate(); - int arrayCoord[3] = {currCoord.getX(),currCoord.getY(),currCoord.getZ()}; - MortonIndex currMorton = currCoord.getMortonIndex(currLevel); - double position[3]; - position[0] = boxCorner[0] + currCoord.getX()*boxwidth/double(1<<currLevel); - position[1] = boxCorner[1] + currCoord.getY()*boxwidth/double(1<<currLevel); - position[2] = boxCorner[2] + currCoord.getZ()*boxwidth/double(1<<currLevel); - cellReset(currLevel,currMorton,arrayCoord,position,currCell->getContainer()); - } - }); - } - void intern_dealloc_handle(Callback_free_cell userDeallocator){ free_cell(userDeallocator); } diff --git a/Addons/CKernelApi/Tests/testChebInterface.c b/Addons/CKernelApi/Tests/testChebInterface.c index e1ba20606bfd2f479709d9fbe566a97a95ab6ee1..0026a20a4407fdd6f2531b968c8dad6c7c660811 100644 --- a/Addons/CKernelApi/Tests/testChebInterface.c +++ b/Addons/CKernelApi/Tests/testChebInterface.c @@ -1,7 +1,7 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> - +#include <math.h> //For timing monitoring #include <time.h> @@ -20,7 +20,7 @@ /** * @brief Wrapper to init internal ChebCell */ -void* cheb_init_cell(int level, long long morton_index, int* tree_position, double* spatial_position){ +void* cheb_init_cell(int level, long long morton_index, int* tree_position, double* spatial_position, void * KernelDatas){ return ChebCellStruct_create(morton_index,tree_position); } @@ -55,6 +55,10 @@ void cheb_p2pFull(FSize nbParticles, const FSize* particleIndexes, ChebKernel_P2P(nbParticles, particleIndexes, sourceParticleIndexes, sourceNbPart, userData); } +void cheb_resetCell(int level, long long morton_index, int* tree_position, double* spatial_position, void * userCell){ + ChebCell_reset(level,morton_index,tree_position,spatial_position,userCell); +} + /** * @brief Wrapper on timeval struct @@ -158,10 +162,11 @@ int main(int argc, char ** av){ /* fclose(fd); */ } - scalfmm_handle handle = scalfmm_init(user_defined_kernel); + scalfmm_handle handle = scalfmm_init(user_defined_kernel,multi_thread); //For Reference - scalfmm_handle handle_ref = scalfmm_init(chebyshev); + scalfmm_handle handle_ref = scalfmm_init(chebyshev,multi_thread); + //Struct for user defined kernel struct User_Scalfmm_Cell_Descriptor cellDescriptor; @@ -180,13 +185,8 @@ int main(int argc, char ** av){ scalfmm_build_tree(handle,treeHeight, boxWidth, boxCenter, cellDescriptor); scalfmm_build_tree(handle_ref,treeHeight, boxWidth, boxCenter, user_descr); - // Insert particles - printf("Inserting particles...\n"); - scalfmm_tree_insert_particles_xyz(handle, nbPart, particleXYZ); - scalfmm_tree_insert_particles_xyz(handle_ref, nbPart, particleXYZ); + //Once is the tree built, one must set the kernel before inserting particles - //Set physical values for Cheb_ref - scalfmm_set_physical_values(handle_ref,nbPart,physicalValues); //Set our callbacks struct User_Scalfmm_Kernel_Descriptor kernel; kernel.p2m = cheb_p2m; @@ -239,83 +239,125 @@ int main(int argc, char ** av){ //Give ScalFMM the datas before calling fmm (this will set as well the kernel) scalfmm_user_kernel_config(handle,kernel,&userDatas); - //Set timers - Timer interface_timer,ref_timer; - //Execute FMM - tic(&interface_timer); - scalfmm_execute_fmm(handle/*, kernel, &my_data*/); - tac(&interface_timer); + // Insert particles + printf("Inserting particles...\n"); + scalfmm_tree_insert_particles_xyz(handle, nbPart, particleXYZ,BOTH); + scalfmm_tree_insert_particles_xyz(handle_ref, nbPart, particleXYZ,BOTH); - //Reduction on forces & potential arrays - { - FSize idxPart; - for(idThreads=1 ; idThreads<nb_threads ; ++idThreads){ - for(idxPart=0 ; idxPart<nbPart ; ++idxPart){ - //Everything is stored in first array - forcesToStore[0][3*idxPart+0] += forcesToStore[idThreads][3*idxPart+0]; - forcesToStore[0][3*idxPart+1] += forcesToStore[idThreads][3*idxPart+1]; - forcesToStore[0][3*idxPart+2] += forcesToStore[idThreads][3*idxPart+2]; - potentialToStore[0][idxPart] += potentialToStore[idThreads][idxPart]; - } - } - } - printf("User defined Chebyshev done\n"); - print_elapsed(&interface_timer); + //Set physical values for Cheb_ref + scalfmm_set_physical_values(handle_ref,nbPart,physicalValues,BOTH); + - tic(&ref_timer); - scalfmm_execute_fmm(handle_ref/*, kernel, &my_data*/); - tac(&ref_timer); - printf("Intern Chebyshev done\n"); - print_elapsed(&ref_timer); + //Set timers + Timer interface_timer,ref_timer; + int ite=0, max_ite=5; + while(ite<max_ite){ + //Execute FMM + tic(&interface_timer); + scalfmm_execute_fmm(handle/*, kernel, &my_data*/); + tac(&interface_timer); + + + //Reduction on forces & potential arrays + { + FSize idxPart; + for(idThreads=1 ; idThreads<nb_threads ; ++idThreads){ + for(idxPart=0 ; idxPart<nbPart ; ++idxPart){ + //Everything is stored in first array + + forcesToStore[0][3*idxPart+0] += forcesToStore[idThreads][3*idxPart+0]; + forcesToStore[0][3*idxPart+1] += forcesToStore[idThreads][3*idxPart+1]; + forcesToStore[0][3*idxPart+2] += forcesToStore[idThreads][3*idxPart+2]; + potentialToStore[0][idxPart] += potentialToStore[idThreads][idxPart]; + } + } + } - //Print time results - print_difference_elapsed(&interface_timer,&ref_timer); + printf("User defined Chebyshev done\n"); + print_elapsed(&interface_timer); + + tic(&ref_timer); + scalfmm_execute_fmm(handle_ref/*, kernel, &my_data*/); + tac(&ref_timer); + + printf("Intern Chebyshev done\n"); + print_elapsed(&ref_timer); + + //Print time results + print_difference_elapsed(&interface_timer,&ref_timer); + + //get back the forces & potentials for ref_cheb execution + double * forcesRef = malloc(sizeof(double)*3*nbPart); + double * potentialsRef = malloc(sizeof(double)*nbPart); + + memset(forcesRef,0,sizeof(double)*3*nbPart); + memset(potentialsRef,0,sizeof(double)*nbPart); + + scalfmm_get_forces_xyz(handle_ref,nbPart,forcesRef,BOTH); + scalfmm_get_potentials(handle_ref,nbPart,potentialsRef,BOTH); + //scalfmm_print_everything(handle_ref); + + {//Comparison part + FSize idxPart; + int nbPartOkay = 0; + for(idxPart=0 ; idxPart<nbPart ; ++idxPart ){ + double diffX,diffY,diffZ,diffPot; + diffX = fabs( forcesToStore[0][idxPart*3+0]-forcesRef[idxPart*3+0] ); + diffY = fabs( forcesToStore[0][idxPart*3+1]-forcesRef[idxPart*3+1] ); + diffZ = fabs( forcesToStore[0][idxPart*3+2]-forcesRef[idxPart*3+2] ); + diffPot = fabs( potentialToStore[0][idxPart]-potentialsRef[idxPart] ); + + //THERE + + if(diffX < 0.00000001 && diffY < 0.00000001 && diffZ < 0.00000001 && diffPot < 0.00000001){ + nbPartOkay++; + } + else{ + printf("id : %lld : %e, %e, %e, %e, ChebInterf Pot : %e Cheb Pot : %e \n", + idxPart,diffX,diffY,diffZ,diffPot, + potentialToStore[0][idxPart], + potentialsRef[idxPart]); + } + //That part is to verify with our usual exec' if everything is alright + if(idxPart == 0 || idxPart == nbPart/2 || idxPart == nbPart-1){ + printf("User one's id : %lld : %e, %e, %e, %e\n",idxPart, + forcesToStore[0][idxPart*3+0], + forcesToStore[0][idxPart*3+1], + forcesToStore[0][idxPart*3+2], + potentialToStore[0][idxPart]); + printf("Chebyshev one's id : %lld : %e, %e, %e, %e\n",idxPart, + forcesRef[idxPart*3+0], + forcesRef[idxPart*3+1], + forcesRef[idxPart*3+2], + potentialsRef[idxPart]); + } + } + printf("End of simulation -- \t %d\n \t Percentage of good parts : %d/%d (%f %%) \n",ite, + nbPartOkay,nbPart,(((double) nbPartOkay)/(double)nbPart)*100); + } - //get back the forces & potentials for ref_cheb execution - double * forcesRef = malloc(sizeof(double)*3*nbPart); - double * potentialsRef = malloc(sizeof(double)*nbPart); + free(forcesRef); + free(potentialsRef); - memset(forcesRef,0,sizeof(double)*3*nbPart); - memset(potentialsRef,0,sizeof(double)*nbPart); + //Reset + scalfmm_reset_tree(handle,cheb_resetCell); + scalfmm_reset_tree(handle_ref,NULL); - scalfmm_get_forces_xyz(handle_ref,nbPart,forcesRef); - scalfmm_get_potentials(handle_ref,nbPart,potentialsRef); + printf("Internal resets done \n"); - {//Comparison part - FSize idxPart; - int nbPartOkay = 0; - for(idxPart=0 ; idxPart<nbPart ; ++idxPart ){ - double diffX,diffY,diffZ,diffPot; - diffX = forcesToStore[0][idxPart*3+0]-forcesRef[idxPart*3+0]; - diffY = forcesToStore[0][idxPart*3+1]-forcesRef[idxPart*3+1]; - diffZ = forcesToStore[0][idxPart*3+2]-forcesRef[idxPart*3+2]; - diffPot = potentialToStore[0][idxPart]-potentialsRef[idxPart]; - - //THERE - - if(diffX < 0.00000001 && diffY < 0.00000001 && diffZ < 0.00000001 && diffPot < 0.00000001){ - nbPartOkay++; - } - else{ - printf("id : %lld : %e, %e, %e, %e\n",idxPart,diffX,diffY,diffZ,diffPot); - } - //That part is to verify with our usual exec' if everything is alright - if(idxPart == 0 || idxPart == nbPart/2 || idxPart == nbPart-1){ - printf("User one's id : %lld : %e, %e, %e\n",idxPart, - forcesToStore[0][idxPart*3+0], - forcesToStore[0][idxPart*3+1], - forcesToStore[0][idxPart*3+2]); - printf("Chebyshev one's id : %lld : %e, %e, %e\n",idxPart, - forcesRef[idxPart*3+0], - forcesRef[idxPart*3+1], - forcesRef[idxPart*3+2]); + {//Reset User's datas + FSize idThreads; + for(idThreads=0;idThreads<nb_threads;++idThreads){ + memset(potentialToStore[idThreads],0,sizeof(double)*nbPart); + memset(forcesToStore[idThreads],0,sizeof(double)*nbPart*3); } } - printf("End of simulation \n \t Percentage of good parts : %d/%d (%f %%) \n", - nbPartOkay,nbPart,(((double) nbPartOkay)/(double)nbPart)*100); + printf("External resets done ...\n"); + + ite++; } printf("Free the kernels\n"); @@ -325,7 +367,6 @@ int main(int argc, char ** av){ free(particleXYZ); free(physicalValues); - free(forcesRef); //free the thread' specific datas for(idThreads=0 ; idThreads<nb_threads ; ++idThreads){ free(forcesToStore[idThreads]); diff --git a/Addons/CKernelApi/Tests/testSphereElectro.c b/Addons/CKernelApi/Tests/testSphereElectro.c new file mode 100644 index 0000000000000000000000000000000000000000..973c4ecc69ac6f2d2717720c0d0aa8c5e66c218e --- /dev/null +++ b/Addons/CKernelApi/Tests/testSphereElectro.c @@ -0,0 +1,273 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <math.h> + + + + +//For timing monitoring +#include <time.h> +#include <sys/time.h> + +#include "../Src/CScalfmmApi.h" + +#include "../../Src/Kernels/Chebyshev/FChebInterface.h" + +double getRandom(){ + return (random()/(double)(RAND_MAX)); +} + +void generateSurfacePointOnUnitSphere(int N, double * points){ + double u, v, theta, phi, sinPhi ; + // + int j = 0,i=0 ; + for ( i = 0 ; i< N ; ++i, j+=3) { + // + u = getRandom() ; v = getRandom() ; + theta = 2*M_PI*u ; + phi = acos(2*v-1); + sinPhi = sin(phi); + // + points[j] = cos(theta)*sinPhi ; + points[j+1] = sin(theta)*sinPhi ; + points[j+2] = 2*v-1 ; + // + } +} + +void generateSurfacePoints(double rayon, double* centre, int nbDePoints, double* points){ + + generateSurfacePointOnUnitSphere(nbDePoints , points) ; + int j =0,i=0 ; + for ( i = 0 ; i< nbDePoints ; ++i, j+=3) { + points[j] *= rayon + centre[0]; + points[j+1] *= rayon + centre[1]; + points[j+2] *= rayon + centre[2]; + } +} + +void generateInsidePoints(double rayon, double*centre, int nbDePoints, double* points){ + generateSurfacePointOnUnitSphere(nbDePoints, points); + int j=0; + double u; + for(j=0 ; j<nbDePoints ; ++j){ + u = getRandom(); + points[j] *= (rayon + centre[0])*u; + points[j+1] *= (rayon + centre[1])*u; + points[j+2] *= (rayon + centre[2])*u; + } +} + +void displayPoints(int nbPoints, double * points){ + int i = 0; + for(i=0 ; i<nbPoints ; ++i){ + printf("%e %e %e \n",points[i*3],points[i*3+1],points[i*3+2]); + } +} + +void displayArray(int nbValue, double * array){ + int i = 0; + for(i=0 ; i<nbValue ; ++i){ + printf("%e \n",array[i]); + } +} + +void getNormal(double * positions, double * normeToFill){ + int i; + double norme = sqrt(positions[0]*positions[0] + positions[1]*positions[1] + positions[2]*positions[2]); + for(i=0 ; i<3 ; ++i){ + normeToFill[i] = positions[i]/norme; + } + printf("Tgt Norme %e - %e - %e\n", + normeToFill[0], + normeToFill[1], + normeToFill[2]); +} + +void computeNormalXForces(int nbPoints, double * forcesToRead, double * positionsToRead, double * arrayToFill){ + double * currentNormal = malloc(sizeof(double)*3); + int idxPart,i; + for(idxPart = 0 ; idxPart<nbPoints ; ++idxPart){ + getNormal(&positionsToRead[idxPart],currentNormal); //get the norme + for(i=0 ; i<3 ; ++i){ + arrayToFill[idxPart] += currentNormal[i]*forcesToRead[idxPart+i]; + } + } + free(currentNormal); +} + +int main(int argc, char ** av){ + printf("Start\n"); + if(argc<2){ + printf("Use : %s nb_part(cible) (optionnal : TreeHeight) \nexiting\n",av[0]); + exit(0); + } + int nbPartTarget= atoi(av[1]); + int treeHeight = 5 ; + if(argc>2){ + int treeHeight = atoi(av[2]); + } + + double boxWidth = 2.0; + double boxCenter[3]; + boxCenter[0] = boxCenter[1] = boxCenter[2] = 0.0; + + int i; + //Allocation of the target points + double * targetsXYZ = malloc(sizeof(double)* 3*nbPartTarget); + double * targetsPhiValues = malloc(sizeof(double)* nbPartTarget); + //Memset (au cas ou) + memset(targetsXYZ,0,sizeof(double)*3*nbPartTarget); + memset(targetsPhiValues,0,sizeof(double)*nbPartTarget); + //Fill + for(i=0 ; i<nbPartTarget ; ++i){ + targetsPhiValues[i] = -1.0; + } + generateSurfacePoints(1.0,boxCenter,nbPartTarget,targetsXYZ); + printf("Surface points generated \n"); + + //Allocation of the sources points + int nbPartSource = 10; + double * sourceXYZ = malloc(sizeof(double)* 3*nbPartSource); + double * sourcePhiValues = malloc(sizeof(double)* nbPartSource); + //Set to Zero + memset(sourceXYZ,0,3*sizeof(double)*nbPartSource); + memset(sourcePhiValues,0,sizeof(double)*nbPartSource); + //Fill + for(i=0 ; i<nbPartSource ; ++i){ + sourcePhiValues[i] = 1.0; + } + generateInsidePoints(1.0,boxCenter,nbPartSource,sourceXYZ); + //displayPoints(nbPartTarget,targetsXYZ); + + printf("Inside points generated \n"); + //displayPoints(nbPartSource,sourceXYZ); + //Creation of arrays to store forces + double * arrayOfForces = malloc(sizeof(double )* 3 * (nbPartSource+nbPartTarget)); + memset(arrayOfForces,0,sizeof(double)* 3 * (nbPartTarget)); + + {//Start of computation + + //For handling the library + scalfmm_handle handle = scalfmm_init(chebyshev,source_target); + + //Struct for ref cheb kernel + struct User_Scalfmm_Cell_Descriptor user_descr; + user_descr.user_init_cell = NULL; + user_descr.user_free_cell = NULL; + //Set algorithm to source target + //scalfmm_algorithm_config(handle,source_target); + //Build the tree + scalfmm_build_tree(handle,treeHeight, boxWidth, boxCenter, user_descr); + + //Insert Sources and targets + scalfmm_tree_insert_particles_xyz(handle,nbPartSource,sourceXYZ,SOURCE); + printf("Sources inserted \n"); + scalfmm_tree_insert_particles_xyz(handle,nbPartTarget,targetsXYZ,TARGET); + printf("Targets inserted \n"); + //Since we inserted first the sources, then sources will get + //indices from 0 to (nbPartSource-1), and targets from + //(nbPartSource) to nbPartSource+nbPartTarget-1). + + int * arrayofIndicesSource = malloc(sizeof(int)*nbPartSource); + int * arrayofIndicesTarget = malloc(sizeof(int)*nbPartTarget); + {//Set physical values + + //SRC + int idPart; + for(idPart = 0 ; idPart<nbPartSource ; ++idPart){ + arrayofIndicesSource[idPart] = idPart; + } + scalfmm_set_physical_values_npart(handle,nbPartSource,arrayofIndicesSource,sourcePhiValues,SOURCE); + //TGT + for(idPart = 0 ; idPart<nbPartTarget ; ++idPart){ + arrayofIndicesTarget[idPart] = idPart; // here, we add the number of sources previously inserted + } + scalfmm_set_physical_values_npart(handle,nbPartTarget,arrayofIndicesTarget,targetsPhiValues,TARGET); + + + + } + //Computation + scalfmm_execute_fmm(handle/*, kernel, &my_data*/); + + //Get back the forces + scalfmm_get_forces_xyz(handle,nbPartTarget,arrayOfForces,TARGET); + scalfmm_get_forces_xyz(handle,nbPartSource,&arrayOfForces[nbPartTarget],SOURCE); + printf("Forces computed : \n"); + displayPoints(nbPartTarget+nbPartSource,arrayOfForces); + printf("As expected, Source forces are 0\n \n"); + //Release memory used : + free(arrayofIndicesSource); + free(arrayofIndicesTarget); + + scalfmm_dealloc_handle(handle,NULL); + + } + + {//Let's check the result, we computed fr each target part its forces + //Storage of reference forces + double * arrayRefForces = malloc(sizeof(double)*nbPartTarget*3); + memset(arrayRefForces,0,sizeof(double)*nbPartTarget*3); + + int idTgt; + for(idTgt = 0 ; idTgt<nbPartTarget ; ++idTgt){ + int idSrc; + double dx,dy,dz; + for(idSrc = 0 ; idSrc<nbPartTarget ; ++idSrc){ + //First compute dist. + dx = sourceXYZ[idSrc+0] - targetsXYZ[idTgt+0]; + dy = sourceXYZ[idSrc+1] - targetsXYZ[idTgt+1]; + dz = sourceXYZ[idSrc+2] - targetsXYZ[idTgt+2]; + + //Secondly, compute coeff + double coeffs = targetsPhiValues[idTgt] * sourcePhiValues[idSrc]; + double one_over_r = 1.0/(sqrt(dx*dx+dy*dy+dz*dz)); + double one_over_r3 = one_over_r * one_over_r * one_over_r; + + arrayRefForces[idTgt*3+0] += dx*coeffs*one_over_r3; + arrayRefForces[idTgt*3+1] += dy*coeffs*one_over_r3; + arrayRefForces[idTgt*3+2] += dz*coeffs*one_over_r3; + + } + } + + {//Then, we compare + double errorCumul = 0; + int idArr; + for(idArr = 0 ; idArr<nbPartTarget ; ++idArr){ + errorCumul += fabs(arrayRefForces[idArr+0]-arrayOfForces[idArr+0]); + errorCumul += fabs(arrayRefForces[idArr+1]-arrayOfForces[idArr+1]); + errorCumul += fabs(arrayRefForces[idArr+2]-arrayOfForces[idArr+2]); + printf("Directly Computed %e %e %e\n", + arrayRefForces[idArr+0], + arrayRefForces[idArr+1], + arrayRefForces[idArr+2]); + } + printf("Error cumul : %e\n",errorCumul); + } + } + + + //Part where we apply normal on target's forces vector + //Copying each target's parts forces, + double * targetsForces = malloc(sizeof(double) * 3 * nbPartTarget); + memcpy(targetsForces,arrayOfForces,sizeof(double)*3*nbPartTarget); + + double * normeXForces = malloc(sizeof(double) * nbPartTarget); + memset(normeXForces,0,sizeof(double) * nbPartTarget); + + computeNormalXForces(nbPartTarget,targetsForces,targetsXYZ,normeXForces); + printf("For each target, we display [Normal To Sphere] . [Force product] \n"); + displayArray(nbPartTarget,normeXForces); + + + //Free memory + free(sourceXYZ); + free(sourcePhiValues); + free(targetsXYZ); + free(targetsPhiValues); + free(arrayOfForces); + return EXIT_SUCCESS; +} diff --git a/CMakeLists.txt b/CMakeLists.txt index f4295c9878140f07fc8de637bd1849afbcdfc9ce..de79d98b0eac2a585a0cd2e4ef5e2362885e12e8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -70,7 +70,8 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/") option( SCALFMM_USE_MIC_NATIVE "Set to ON to compile in native mode for MIC" OFF ) option( SCALFMM_ONLY_DEVEL "Set to ON to compile Development tools (only scalfmm team)" ON ) if( SCALFMM_ONLY_DEVEL ) - option( SCALFMM_USE_STARPU "Set to ON to build SCALFMM with StarPU" OFF ) + option( SCALFMM_USE_STARPU "Set to ON to build SCALFMM with StarPU" OFF ) + option( SCALFMM_BUILD_UTILS "Set to ON to build utils Tests" OFF ) endif() if( SCALFMM_USE_MPI ) try_compile(COMPILE_INTEL ${CMAKE_CURRENT_BINARY_DIR} @@ -653,7 +654,7 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/") # Build tools (Internal use) # ################################################################## # Build - UTILs - if( SCALFMM_ONLY_DEVEL ) + if( SCALFMM_ONLY_DEVEL AND SCALFMM_BUILD_UTILS) add_subdirectory(Utils) endif() diff --git a/CMakeModules/morse/find/FindBLAS.cmake b/CMakeModules/morse/find/FindBLAS.cmake index 9bcbd569c910ff0a06c645b4ba91a896431cd5a3..b1242f0dae40ff27267435fd6b938bb4742f8d60 100644 --- a/CMakeModules/morse/find/FindBLAS.cmake +++ b/CMakeModules/morse/find/FindBLAS.cmake @@ -47,6 +47,19 @@ ## Intel10_64lp_seq (intel mkl v10 64 bit,sequential code, lp64 model), ## Intel( older versions of mkl 32 and 64 bit), ACML,ACML_MP,ACML_GPU,Apple, NAS, Generic # C/CXX should be enabled to use Intel mkl +### +# We handle different modes to find the dependency +# +# - Detection if already installed on the system +# - BLAS libraries can be detected from different ways +# Here is the order of precedence: +# 1) we look in cmake variable BLAS_LIBDIR or BLAS_DIR (we guess the libdirs) if defined +# 2) we look in environnement variable BLAS_LIBDIR or BLAS_DIR (we guess the libdirs) if defined +# 3) we look in common environnment variables depending on the system (INCLUDE, C_INCLUDE_PATH, CPATH - LIB, DYLD_LIBRARY_PATH, LD_LIBRARY_PATH) +# 4) we look in common system paths depending on the system, see for example paths contained in the following cmake variables: +# - CMAKE_PLATFORM_IMPLICIT_INCLUDE_DIRECTORIES, CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES +# - CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES, CMAKE_C_IMPLICIT_LINK_DIRECTORIES +# #============================================================================= # Copyright 2007-2009 Kitware, Inc. @@ -288,6 +301,10 @@ macro(Check_Fortran_Libraries LIBRARIES _prefix _name _flags _list _thread) unset(${_prefix}${_combined_name}_WORKS CACHE) endif() if (_CHECK_FORTRAN) + if (CMAKE_Fortran_COMPILER_ID STREQUAL "GNU") + string(REPLACE "mkl_intel_lp64" "mkl_gf_lp64" CMAKE_REQUIRED_LIBRARIES "${CMAKE_REQUIRED_LIBRARIES}") + string(REPLACE "mkl_intel_ilp64" "mkl_gf_ilp64" CMAKE_REQUIRED_LIBRARIES "${CMAKE_REQUIRED_LIBRARIES}") + endif() check_fortran_function_exists("${_name}" ${_prefix}${_combined_name}_WORKS) else() check_function_exists("${_name}_" ${_prefix}${_combined_name}_WORKS) @@ -402,8 +419,6 @@ if (BLA_VENDOR MATCHES "Intel*" OR BLA_VENDOR STREQUAL "All") endif() endif() - # libiomp5 - # -------- if (WIN32) string(REPLACE ":" ";" _libdir "$ENV{LIB}") elseif (APPLE) @@ -413,6 +428,8 @@ if (BLA_VENDOR MATCHES "Intel*" OR BLA_VENDOR STREQUAL "All") endif () list(APPEND _libdir "${CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES}") list(APPEND _libdir "${CMAKE_C_IMPLICIT_LINK_DIRECTORIES}") + # libiomp5 + # -------- set(OMP_iomp5_LIBRARY "OMP_iomp5_LIBRARY-NOTFOUND") find_library(OMP_iomp5_LIBRARY NAMES iomp5 @@ -420,15 +437,35 @@ if (BLA_VENDOR MATCHES "Intel*" OR BLA_VENDOR STREQUAL "All") ) mark_as_advanced(OMP_iomp5_LIBRARY) set(OMP_LIB "") - if (OMP_iomp5_LIBRARY) - set(OMP_LIB "${OMP_iomp5_LIBRARY}") + # libgomp + # ------- + set(OMP_gomp_LIBRARY "OMP_gomp_LIBRARY-NOTFOUND") + find_library(OMP_gomp_LIBRARY + NAMES gomp + HINTS ${_libdir} + ) + mark_as_advanced(OMP_gomp_LIBRARY) + # choose one or another depending on the compilo + if (CMAKE_C_COMPILER_ID STREQUAL "GNU") + if (OMP_gomp_LIBRARY) + set(OMP_LIB "${OMP_gomp_LIBRARY}") + endif() + else(CMAKE_C_COMPILER_ID STREQUAL "Intel") + if (OMP_iomp5_LIBRARY) + set(OMP_LIB "${OMP_iomp5_LIBRARY}") + endif() endif() if (UNIX AND NOT WIN32) set(LM "-lm") set(BLAS_COMPILER_FLAGS "") - if (CMAKE_C_COMPILER_ID STREQUAL "Intel" AND NOT BLA_VENDOR STREQUAL "Intel10_64lp_seq") - list(APPEND BLAS_COMPILER_FLAGS "-openmp") + if (NOT BLA_VENDOR STREQUAL "Intel10_64lp_seq") + if (CMAKE_C_COMPILER_ID STREQUAL "Intel") + list(APPEND BLAS_COMPILER_FLAGS "-openmp") + endif() + if (CMAKE_C_COMPILER_ID STREQUAL "GNU") + list(APPEND BLAS_COMPILER_FLAGS "-fopenmp") + endif() endif() if (CMAKE_C_COMPILER_ID STREQUAL "GNU") if (BLA_VENDOR STREQUAL "Intel10_32") @@ -510,8 +547,14 @@ if (BLA_VENDOR MATCHES "Intel*" OR BLA_VENDOR STREQUAL "All") list(APPEND BLAS_SEARCH_LIBS "mkl_blas95 mkl_intel_lp64 mkl_intel_thread mkl_core guide") # mkl >= 10.3 - list(APPEND BLAS_SEARCH_LIBS - "mkl_blas95_lp64 mkl_intel_lp64 mkl_intel_thread mkl_core") + if (CMAKE_C_COMPILER_ID STREQUAL "Intel") + list(APPEND BLAS_SEARCH_LIBS + "mkl_blas95_lp64 mkl_intel_lp64 mkl_intel_thread mkl_core") + endif() + if (CMAKE_C_COMPILER_ID STREQUAL "GNU") + list(APPEND BLAS_SEARCH_LIBS + "mkl_blas95_lp64 mkl_intel_lp64 mkl_gnu_thread mkl_core") + endif() endif () if (BLA_VENDOR STREQUAL "Intel10_64lp_seq" OR BLA_VENDOR STREQUAL "All") list(APPEND BLAS_SEARCH_LIBS @@ -576,8 +619,14 @@ if (BLA_VENDOR MATCHES "Intel*" OR BLA_VENDOR STREQUAL "All") list(APPEND BLAS_SEARCH_LIBS "mkl_intel_lp64 mkl_intel_thread mkl_core guide") # mkl >= 10.3 - list(APPEND BLAS_SEARCH_LIBS - "mkl_intel_lp64 mkl_intel_thread mkl_core") + if (CMAKE_C_COMPILER_ID STREQUAL "Intel") + list(APPEND BLAS_SEARCH_LIBS + "mkl_intel_lp64 mkl_intel_thread mkl_core") + endif() + if (CMAKE_C_COMPILER_ID STREQUAL "GNU") + list(APPEND BLAS_SEARCH_LIBS + "mkl_intel_lp64 mkl_gnu_thread mkl_core") + endif() endif () if (BLA_VENDOR STREQUAL "Intel10_64lp_seq" OR BLA_VENDOR STREQUAL "All") list(APPEND BLAS_SEARCH_LIBS diff --git a/CMakeModules/morse/find/FindCBLAS.cmake b/CMakeModules/morse/find/FindCBLAS.cmake index 6dd08da341e18d479f836933a66569e47800f4f6..eefd7b59ded0416e74d8ada8b5a1ea4a3df5feb6 100644 --- a/CMakeModules/morse/find/FindCBLAS.cmake +++ b/CMakeModules/morse/find/FindCBLAS.cmake @@ -48,6 +48,19 @@ # look for a stand alone cblas, please add the following in your # CMakeLists.txt before to call find_package(CBLAS): # set(CBLAS_STANDALONE TRUE) +### +# We handle different modes to find the dependency +# +# - Detection if already installed on the system +# - CBLAS libraries can be detected from different ways +# Here is the order of precedence: +# 1) we look in cmake variable CBLAS_LIBDIR or CBLAS_DIR (we guess the libdirs) if defined +# 2) we look in environnement variable CBLAS_LIBDIR or CBLAS_DIR (we guess the libdirs) if defined +# 3) we look in common environnment variables depending on the system (INCLUDE, C_INCLUDE_PATH, CPATH - LIB, DYLD_LIBRARY_PATH, LD_LIBRARY_PATH) +# 4) we look in common system paths depending on the system, see for example paths contained in the following cmake variables: +# - CMAKE_PLATFORM_IMPLICIT_INCLUDE_DIRECTORIES, CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES +# - CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES, CMAKE_C_IMPLICIT_LINK_DIRECTORIES +# #============================================================================= # Copyright 2012-2013 Inria @@ -80,11 +93,7 @@ endif() if (CBLAS_FIND_COMPONENTS) foreach( component ${CBLAS_FIND_COMPONENTS} ) if(CBLAS_FIND_REQUIRED_${component}) - if (CBLAS_FIND_REQUIRED) - find_package(${component} REQUIRED) - else() - find_package(${component}) - endif() + find_package(${component} REQUIRED) else() find_package(${component}) endif() diff --git a/CMakeModules/morse/find/FindMETIS.cmake b/CMakeModules/morse/find/FindMETIS.cmake index 43a16e7b034b3537785dabf69a2d62401997557e..9d026b7852d9ce2f3a19e64743e2321be9fd37af 100644 --- a/CMakeModules/morse/find/FindMETIS.cmake +++ b/CMakeModules/morse/find/FindMETIS.cmake @@ -205,6 +205,10 @@ if(METIS_LIBRARIES) set(REQUIRED_LIBDIRS "${METIS_LIBRARY_DIRS}") endif() set(REQUIRED_LIBS "${METIS_LIBRARIES}") + # m + if(UNIX OR WIN32) + list(APPEND REQUIRED_LIBS "-lm") + endif() # set required libraries for link set(CMAKE_REQUIRED_INCLUDES "${REQUIRED_INCDIRS}") diff --git a/CMakeModules/morse/find/FindPARMETIS.cmake b/CMakeModules/morse/find/FindPARMETIS.cmake index 227c178e432373a095d56e3ad1cfd7131afb5f69..36235d868fb98212d0438af3ebf77db8f382ed13 100644 --- a/CMakeModules/morse/find/FindPARMETIS.cmake +++ b/CMakeModules/morse/find/FindPARMETIS.cmake @@ -205,6 +205,10 @@ if(PARMETIS_LIBRARIES) set(REQUIRED_LIBDIRS "${PARMETIS_LIBRARY_DIRS}") endif() set(REQUIRED_LIBS "${PARMETIS_LIBRARIES}") + # m + if(UNIX OR WIN32) + list(APPEND REQUIRED_LIBS "-lm") + endif() # set required libraries for link set(CMAKE_REQUIRED_INCLUDES "${REQUIRED_INCDIRS}") diff --git a/CMakeModules/morse/find/FindSTARPU.cmake b/CMakeModules/morse/find/FindSTARPU.cmake index b1edf5ddc982ab7d5305c5ef9578cdc8586d32cc..11f39abc0e0048e2b563faef4e4a39aa9d139cd2 100644 --- a/CMakeModules/morse/find/FindSTARPU.cmake +++ b/CMakeModules/morse/find/FindSTARPU.cmake @@ -310,7 +310,12 @@ if( (NOT PKG_CONFIG_EXECUTABLE) OR (PKG_CONFIG_EXECUTABLE AND NOT STARPU_FOUND) set(STARPU_${starpu_hdr}_INCLUDE_DIRS "STARPU_${starpu_hdr}_INCLUDE_DIRS-NOTFOUND") find_path(STARPU_${starpu_hdr}_INCLUDE_DIRS NAMES ${starpu_hdr} - HINTS ${_inc_env}) + HINTS ${_inc_env} + PATH_SUFFIXES + "starpu/1.0" + "starpu/1.1" + "starpu/1.2" + "starpu/1.3") endforeach() endif() endif() diff --git a/Examples/generateDistributions.cpp b/Examples/generateDistributions.cpp index 7a221f49cd9982c9f34c8b6844ea5805bae86001..e0972205977134a9406e05b0ad4f9ca148c2da51 100644 --- a/Examples/generateDistributions.cpp +++ b/Examples/generateDistributions.cpp @@ -111,12 +111,14 @@ int main(int argc, char ** argv){ FParameterDefinitions::NbParticles,FParameterDefinitions::OutputVisuFile,LocalOptionEllipsoid); - + typedef double FReal; - FReal extraRadius = 0.000 ; + FReal extraRadius = 0.000 ; + const FSize NbPoints = FParameters::getValue(argc,argv,FParameterDefinitions::NbParticles.options, FSize(20000)); - const std::string genericFileName(FParameters::getStr(argc,argv,FParameterDefinitions::InputFile.options, "unifPointDist")); - FReal BoxWith = 0.0; + const std::string genericFileName(FParameters::getStr(argc,argv,FParameterDefinitions::OutputFile.options, "unifPointDist")); + + FReal BoxWith = 0.0; FPoint<FReal> Centre(0.0, 0.0,0.0); // // Allocation diff --git a/Src/BalanceTree/FChebSymCostKernel.hpp b/Src/BalanceTree/FChebSymCostKernel.hpp new file mode 100644 index 0000000000000000000000000000000000000000..58dab287cc56323f1a8734556fea09fe3e20759a --- /dev/null +++ b/Src/BalanceTree/FChebSymCostKernel.hpp @@ -0,0 +1,481 @@ +// =================================================================================== +// Copyright ScalFmm 2011 INRIA, Olivier Coulaud, Berenger Bramas, Matthias Messner +// olivier.coulaud@inria.fr, berenger.bramas@inria.fr +// This software is a computer program whose purpose is to compute the FMM. +// +// This software is governed by the CeCILL-C and LGPL licenses and +// abiding by the rules of distribution of free software. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public and CeCILL-C Licenses for more details. +// "http://www.cecill.info". +// "http://www.gnu.org/licenses". +// =================================================================================== + +// ==== CMAKE ==== +// Keep in private GIT +// @SCALFMM_PRIVATE + + +#ifndef FCHEBFLOPSSYMKERNEL_HPP +#define FCHEBFLOPSSYMKERNEL_HPP + +#include <stdexcept> + +#include "Utils/FGlobal.hpp" + +#include "Utils/FSmartPointer.hpp" + +#include "Components/FAbstractKernels.hpp" + +#include "Kernels/Chebyshev/FChebInterpolator.hpp" +#include "Kernels/Chebyshev/FChebCell.hpp" +#include "Kernels/Interpolation/FInterpSymmetries.hpp" + +class FTreeCoordinate; + +/** + * \author Quentin Khan, Matthias Messner (original file: FChebFlopsSymKernel) + * \brief Cost computation of Chebyshev interpolation based FMM. + * + * Please read the license + * + * This kernel implements the cost computation of the Chebyshev interpolation + * based FMM operators exploiting the symmetries in the far-field. It implements + * all interfaces (P2P, P2M, M2M, M2L, L2L, L2P) which are required by the + * FFmmAlgorithm and FFmmAlgorithmThread. + * + * \tparam Freal Type of real number representation + * \tparam CellClass Type of cell + * \tparam ContainerClass Type of container to store particles + * \tparam MatrixKernelClass Type of matrix kernel function + * \tparam ORDER Chebyshev interpolation order + * \tparam OctreeClass Class of the octree to work on. + */ +template < typename FReal, class CellClass, class ContainerClass, class MatrixKernelClass, int ORDER, class OctreeClass> +class FChebSymCostKernel : public FAbstractKernels<CellClass, ContainerClass> +{ + enum {nnodes = TensorTraits<ORDER>::nnodes}; +public: + // Class types available to the rest of the world + /// Type of real number representation + using _FReal = FReal; + /// Type of cell + using _CellClass = CellClass; + /// Type of container to store particles + using _ContainerClass = ContainerClass; + /// Type of matrix kernel function + using _MatrixKernelClass = MatrixKernelClass; + /// Chebyshev interpolation order + constexpr static const int order = ORDER; + /// Class of the octree to work on + using _OctreeClass = OctreeClass; + + + /// Handler to deal with all symmetries: Stores permutation indices and + /// vectors to reduce 343 different interactions to 16 only. + struct SymmetryHandler; + + /// Needed for handling all symmetries + const FSmartPointer<MatrixKernelClass,FSmartPointerMemory> MatrixKernel; + const FSmartPointer<SymmetryHandler, FSmartPointerMemory> SymHandler; + + /// The tree the kernel is working on. Needed to attain cells in the P2P + /// operator (we only get the particles containers otherwise) + const OctreeClass* const _tree; + + /// The tree height + const unsigned int _treeHeight; + + /// count permuted local and multipole expansions + unsigned int* countExp; + + /// Flops count for each operator of the FMM. + unsigned long long flopsP2M = 0, + flopsM2M = 0, + flopsM2L = 0, + flopsL2L = 0, + flopsL2P = 0, + flopsP2P = 0; + + /// Operators count. + unsigned long long countP2M = 0, + countM2M = 0, + countM2L = 0, + countL2L = 0, + countL2P = 0, + countP2P = 0; + + + + /// start flop counters + FSize countFlopsM2MorL2L() const + { return 3 * nnodes * (2*ORDER-1); } + + FSize countFlopsM2L(const unsigned int nexp, const unsigned int rank) const + { return nexp * (4*nnodes*rank - rank - nnodes); } + + FSize countFlopsP2P() const + { return 34; } + + FSize countFlopsP2Pmutual() const + { return 39; } + + FSize countFlopsP2M(const FSize N) const { + const FSize first = N * (18 + (ORDER-2) * 6 + (ORDER-1) * (6 + (ORDER-1) * (6 + (ORDER-1) * 2))); + const FSize W2 = 3 * ORDER*(2*(ORDER-1)-1); + const FSize W4 = 3 * (ORDER*(ORDER-1)*(2*(ORDER-1)-1) + ORDER*ORDER*(2*(ORDER-1)-1)); + const FSize W8 = 3 * (2*(ORDER-1)-1) * (ORDER*(ORDER-1)*(ORDER-1) + ORDER*ORDER*(ORDER-1) + nnodes); + return first + W2 + W4 + W8 + nnodes*11; + } + + FSize countFlopsL2PTotal(const FSize N) const { + const unsigned W0 = nnodes; + const unsigned W2 = 3 * (ORDER-1)*ORDER*ORDER * 2*ORDER; + const unsigned W4 = 3 * ORDER*(ORDER-1)*(ORDER-1) * 2*ORDER; + const unsigned W8 = (ORDER-1)*(ORDER-1)*(ORDER-1) * (2*ORDER-1); + const FSize second = N * (38 + (ORDER-2)*15 + (ORDER-1)*((ORDER-1) * (27 + (ORDER-1) * 16))) + 6; + return W0 + W2 + W4 + W8 + second; + } + // end flop counters + +public: + /** + * The constructor initializes all constant attributes and it reads the + * precomputed and compressed M2L operators from a binary file (an + * runtime_error is thrown if the required file is not valid). + */ + FChebSymCostKernel(OctreeClass* tree, + const FReal Epsilon) + : MatrixKernel(new MatrixKernelClass()), + SymHandler(new SymmetryHandler(MatrixKernel.getPtr(), Epsilon)), + _tree(tree), + _treeHeight(_tree->getHeight()) + { + countExp = new unsigned int [343]; + } + + + + /** Copy constructor */ + FChebSymCostKernel(const FChebSymCostKernel& other) : + SymHandler(other.SymHandler), + _tree(other._tree), + _treeHeight(other._treeHeight) + { + countExp = new unsigned int [343]; + } + + /** Destructor */ + ~FChebSymCostKernel() { + delete [] countExp; + + } + + void printResults(std::ostream& os) const { + os << "\n==================================================" + << "\n- P2M Flops:" << flopsP2M + << "\n- M2M Flops:" << flopsM2M + << "\n- M2L Flops:" << flopsM2L + << "\n- L2L Flops:" << flopsL2L + << "\n- L2P Flops:" << flopsL2P + << "\n- P2P Flops:" << flopsP2P + << "\n- Overall Flops = " << flopsP2M + flopsM2M + flopsM2L + + flopsL2L + flopsL2P + flopsP2P + << "\n==================================================\n" + << std::endl; + + os << "P2P count: " << countP2P << std::endl; + os << "P2M count: " << countP2M << std::endl; + os << "M2M count: " << countM2M << std::endl; + os << "M2L count: " << countM2L << std::endl; + os << "L2L count: " << countL2L << std::endl; + os << "L2P count: " << countL2P << std::endl; + + } + + + void P2M(CellClass* const cell, const ContainerClass* const SourceParticles) { + FSize tmpCost = countFlopsP2M(SourceParticles->getNbParticles()); + flopsP2M += tmpCost; + cell->addCost(tmpCost); + countP2M++; + } + + + + void M2M(CellClass* const FRestrict cell, + const CellClass*const FRestrict *const FRestrict ChildCells, + const int /*TreeLevel*/) { + FSize flops = 0; + for (unsigned int ChildIndex=0; ChildIndex < 8; ++ChildIndex) + if (ChildCells[ChildIndex]) flops += countFlopsM2MorL2L(); + flopsM2M += flops; + cell->addCost(flops); + countM2M++; + } + + + + + void M2L(CellClass* const FRestrict cell, + const CellClass* SourceCells[343], + const int /* not needed */, + const int /* TreeLevel */) + { + FSize flops = 0; + // count how ofter each of the 16 interactions is used + memset(countExp, 0, sizeof(int) * 343); + for (unsigned int idx=0; idx<343; ++idx) + if (SourceCells[idx]) countExp[SymHandler->pindices[idx]]++; + // multiply (mat-mat-mul) + for (unsigned int pidx=0; pidx<343; ++pidx) + if (countExp[pidx]) + flops += countFlopsM2L(countExp[pidx], SymHandler->LowRank[pidx]) + + countExp[pidx]*nnodes; + flopsM2L += flops; + cell->addCost(flops); + countM2L++; + } + + + void L2L(const CellClass* const FRestrict /* not needed */, + CellClass* FRestrict *const FRestrict ChildCells, + const int /* TreeLevel*/) { + FSize flops = 0; + FSize tmpCost = 0; + for (unsigned int ChildIndex=0; ChildIndex < 8; ++ChildIndex) + if (ChildCells[ChildIndex]) { + tmpCost = countFlopsM2MorL2L() + nnodes; + flops += tmpCost; + ChildCells[ChildIndex]->addCost(flops); + } + flopsL2L += flops; + + countL2L++; + } + + + + void L2P(const CellClass* const cell, + ContainerClass* const TargetParticles) { + //// 1.a) apply Sx + //flopsL2P += countFlopsP2MorL2P(TargetParticlesParticles->getNbParticles()) + TargetParticles->getNbParticles(); + //// 1.b) apply Px (grad Sx) + //flopsL2P += countFlopsL2PGradient(TargetParticlesParticles->getNbParticles()) + 3 * TargetParticles->getNbParticles(); + + // or + + // 2) apply Sx and Px (grad Sx) + FSize tmpCost = 0; + tmpCost = countFlopsL2PTotal(TargetParticles->getNbParticles()) + 4 * TargetParticles->getNbParticles(); + flopsL2P += tmpCost; + cell->addCost(tmpCost); + countL2P++; + } + + + + void P2P(const FTreeCoordinate& LeafCellCoordinate, // needed for periodic boundary conditions + ContainerClass* const FRestrict TargetParticles, + const ContainerClass* const FRestrict SourceParticles, + ContainerClass* const NeighborSourceParticles[27], + const int /* size */) { + FSize tmpCost = 0; + FSize srcPartCount = SourceParticles->getNbParticles(); + FSize tgtPartCount = TargetParticles->getNbParticles(); + + if ( TargetParticles != SourceParticles ) { + tmpCost += countFlopsP2P() * tgtPartCount * srcPartCount; + + for ( unsigned int idx = 0; idx < 27; ++idx ) { + if (NeighborSourceParticles[idx]) { + tmpCost += + countFlopsP2P() + * tgtPartCount + * NeighborSourceParticles[idx]->getNbParticles(); + } + } + } else { + tmpCost += + countFlopsP2Pmutual() + * ((tgtPartCount * tgtPartCount + + tgtPartCount) + / 2); + for (unsigned int idx=0; idx<=13; ++idx) + { + if (NeighborSourceParticles[idx]) { + tmpCost += + countFlopsP2Pmutual() + * tgtPartCount + * NeighborSourceParticles[idx]->getNbParticles(); + } + } + } + + flopsP2P += tmpCost; + + CellClass* cell = _tree->getCell( + LeafCellCoordinate.getMortonIndex(_treeHeight - 1), + _treeHeight - 1); + + cell->addNearCost(tmpCost); + countP2P++; + } +}; + + + +/** + * Handler to deal with all symmetries: Stores permutation indices and vectors + * to reduce 343 different interactions to 16 only. + */ +template < typename FReal, class CellClass, class ContainerClass, + class MatrixKernelClass, int ORDER, class OctreeClass> +struct FChebSymCostKernel<FReal, CellClass, ContainerClass, MatrixKernelClass, ORDER, OctreeClass> +::SymmetryHandler +{ + // M2L operators + FReal* K[343]; + int LowRank[343]; + + // permutation vectors and permutated indices + unsigned int pvectors[343][nnodes]; + unsigned int pindices[343]; + + + // compute rank + unsigned int getRank(const FReal singular_values[], const double eps) + { + FReal nrm2(0.); + for (unsigned int k=0; k<nnodes; ++k) + nrm2 += singular_values[k] * singular_values[k]; + + FReal nrm2k(0.); + for (unsigned int k=nnodes; k>0; --k) { + nrm2k += singular_values[k-1] * singular_values[k-1]; + if (nrm2k > eps*eps * nrm2) return k; + } + throw std::runtime_error("rank cannot be larger than nnodes"); + return 0; + } + + + /** Constructor */ + SymmetryHandler(const MatrixKernelClass *const MatrixKernel, const double Epsilon) + { + // init all 343 item to zero, because effectively only 16 exist + for (unsigned int t=0; t<343; ++t) { + K[t] = nullptr; + LowRank[t] = 0; + } + + // set permutation vector and indices + const FInterpSymmetries<ORDER> Symmetries; + for (int i=-3; i<=3; ++i) + for (int j=-3; j<=3; ++j) + for (int k=-3; k<=3; ++k) + if (abs(i)>1 || abs(j)>1 || abs(k)>1) { + const unsigned int idx = ((i+3) * 7 + (j+3)) * 7 + (k+3); + pindices[idx] = Symmetries.getPermutationArrayAndIndex(i,j,k, pvectors[idx]); + } + + // precompute 16 M2L operators + this->precomputeSVD(MatrixKernel, Epsilon); + } + + + + /** Destructor */ + ~SymmetryHandler() + { + for (unsigned int t=0; t<343; ++t) + if (K[ t]!=nullptr) delete [] K[ t]; + } + + + +private: + void precomputeSVD(const MatrixKernelClass *const MatrixKernel, const double Epsilon) + { + // interpolation points of source (Y) and target (X) cell + FPoint<FReal> X[nnodes], Y[nnodes]; + // set roots of target cell (X) + FChebTensor<FReal, ORDER>::setRoots(FPoint<FReal>(0.,0.,0.), FReal(2.), X); + // temporary matrix + FReal* U = new FReal [nnodes*nnodes]; + + // needed for the SVD + const unsigned int LWORK = 2 * (3*nnodes + nnodes); + FReal *const WORK = new FReal [LWORK]; + FReal *const VT = new FReal [nnodes*nnodes]; + FReal *const S = new FReal [nnodes]; + + unsigned int counter = 0; + for (int i=2; i<=3; ++i) { + for (int j=0; j<=i; ++j) { + for (int k=0; k<=j; ++k) { + + // assemble matrix + const FPoint<FReal> cy(FReal(2.*i), FReal(2.*j), FReal(2.*k)); + FChebTensor<FReal, ORDER>::setRoots(cy, FReal(2.), Y); + for (unsigned int n=0; n<nnodes; ++n) + for (unsigned int m=0; m<nnodes; ++m) + U[n*nnodes + m] = MatrixKernel->evaluate(X[m], Y[n]); + + // applying weights //////////////////////////////////////// + FReal weights[nnodes]; + FChebTensor<FReal,ORDER>::setRootOfWeights(weights); + for (unsigned int n=0; n<nnodes; ++n) { + FBlas::scal(nnodes, weights[n], U + n, nnodes); // scale rows + FBlas::scal(nnodes, weights[n], U + n * nnodes); // scale cols + } + ////////////////////////////////////////////////////////// + + // truncated singular value decomposition of matrix + const unsigned int info = FBlas::gesvd(nnodes, nnodes, U, S, VT, nnodes, LWORK, WORK); + if (info!=0) throw std::runtime_error("SVD did not converge with " + info); + const unsigned int rank = this->getRank(S, Epsilon); + + // store + const unsigned int idx = (i+3)*7*7 + (j+3)*7 + (k+3); + assert(K[idx]==nullptr); + K[idx] = new FReal [2*rank*nnodes]; + LowRank[idx] = rank; + for (unsigned int r=0; r<rank; ++r) + FBlas::scal(nnodes, S[r], U + r*nnodes); + FBlas::copy(rank*nnodes, U, K[idx]); + for (unsigned int r=0; r<rank; ++r) + FBlas::copy(nnodes, VT + r, nnodes, K[idx] + rank*nnodes + r*nnodes, 1); + + // un-weighting //////////////////////////////////////////// + for (unsigned int n=0; n<nnodes; ++n) { + // scale rows + FBlas::scal(rank, FReal(1.) / weights[n], K[idx] + n, nnodes); + // scale rows + FBlas::scal(rank, FReal(1.) / weights[n], K[idx] + rank*nnodes + n, nnodes); + } + ////////////////////////////////////////////////////////// + + //std::cout << "(" << i << "," << j << "," << k << ") " << idx << + // ", low rank = " << rank << std::endl; + + counter++; + } + } + } + std::cout << "num interactions = " << counter << std::endl; + delete [] U; + delete [] WORK; + delete [] VT; + delete [] S; + } + +}; + + +#endif + +// [--END--] diff --git a/Src/BalanceTree/FCoordColour.hpp b/Src/BalanceTree/FCoordColour.hpp new file mode 100644 index 0000000000000000000000000000000000000000..a96a8351e664cb61813c5180073d767dfd2edc05 --- /dev/null +++ b/Src/BalanceTree/FCoordColour.hpp @@ -0,0 +1,20 @@ +// ==== CMAKE ==== +// Keep in private GIT +// @SCALFMM_PRIVATE + +#ifndef _FCOORDCOLOUR_HPP_ +#define _FCOORDCOLOUR_HPP_ + +class FCoordColour { + +public: + enum {range = 3*3*3}; + + static int coord2colour(const FTreeCoordinate& coord) { + return (coord.getX() % 3) * 9 + + (coord.getY() % 3) * 3 + + (coord.getZ() % 3); + } +}; + +#endif diff --git a/Src/BalanceTree/FCostCell.hpp b/Src/BalanceTree/FCostCell.hpp new file mode 100644 index 0000000000000000000000000000000000000000..3cf9cb862b75e5fb593737b9351689f0e4d4c0bf --- /dev/null +++ b/Src/BalanceTree/FCostCell.hpp @@ -0,0 +1,104 @@ +// ==== CMAKE ==== +// Keep in private GIT +// @SCALFMM_PRIVATE + +#ifndef _FCOSTCELL_HPP_ +#define _FCOSTCELL_HPP_ + +#include <type_traits> + + +/** + * \brief Empty trait class. + * \author Quentin Khan + * + * This class is used to check whether a cell class has FCostCell in its + * inheritance tree. + */ +class FCostCellTypeTrait {}; + + +/** + * \brief Cell with a cost memory for balance computations. + * \author Quentin Khan + * + * This class extends BaseClass to add simple computation cost memory. + * + * \tparam BaseClass The base cell class to extend. The constructors are + * inherited automatically. + * \tparam CostType The type to use in order to store the cost. Defaults to FSize. + */ +template<typename BaseClass, typename CostType = FSize> +class FCostCell : public BaseClass, virtual public FCostCellTypeTrait { + static_assert(std::is_arithmetic<CostType>::value, + "The cell cost type must be an arithmetic type."); + + /// The far-field cost of the cell. + /** Declared mutable because the existing algorithms use const cells.*/ + mutable CostType _cost = 0; + + /// The near-field cost of the cell. + /** Declared mutable because the existing algorithms use const cells.*/ + mutable CostType _leafCost = 0; + +public: + /// Type definition that can be retrieved by other classes + using costtype = CostType; + + using BaseClass::BaseClass; + + /// Debug member, used to check whether the cell was already visited. + bool _visited = false; + + /** + * \brief Gets the far-field cost of the cell. + * \return The far-field cost of the cell + */ + CostType getCost() const { + return _cost; + } + + /** + * \brief Gets the near-field cost of the cell. + * \return The near-field cost of the cell + */ + CostType getNearCost() const { + return _leafCost; + } + + /** + * \brief Sets the cost of the cell. + */ + void setCost(CostType newCost) { + _cost = newCost; + } + + /** + * \brief Sets the near-field cost of the cell. + */ + void setNearCost(CostType newCost) { + _leafCost = newCost; + } + + /** + * \brief Add a far-field cost to the cell. + * \return The cost of the cell + * \warning Can be used on const cells ! + */ + CostType addCost(CostType cost) const { + _cost += cost; + return _cost; + } + + /** + * \brief Add a near-field cost to the cell. + * \return The cost of the cell + * \warning Can be used on const cells ! + */ + CostType addNearCost(CostType cost) const { + _leafCost += cost; + return _leafCost; + } +}; + +#endif diff --git a/Src/BalanceTree/FCostZones.hpp b/Src/BalanceTree/FCostZones.hpp new file mode 100644 index 0000000000000000000000000000000000000000..3600245ab282b1a0ff19c232a5d0c2c23b530940 --- /dev/null +++ b/Src/BalanceTree/FCostZones.hpp @@ -0,0 +1,455 @@ +// ==== CMAKE ==== +// Keep in private GIT +// @SCALFMM_PRIVATE + +#ifndef _COSTZONES_HPP_ +#define _COSTZONES_HPP_ + +#include "FCostCell.hpp" +#include "FCoordColour.hpp" + +#include <vector> +#include <stdexcept> +#include <sstream> + +/** + * \brief The costzones algorithm implementation. + * \author Quentin Khan <quentin.khan@inria.fr> + * + * This class is an implementation of the costzones algorithm described in "A + * Parallel Adaptive Fast Multipole Method" (1993). The algorithm consists in an + * in-order traversal of the octree where cell costs are accumulated. When an + * accumulation is too big, a new zone is created. + * + * It is possible to set the levels of the tree that must be considered to + * compute the costzones. + * + * \tparam OctreeClass The type of the octree to work on. + * \tparam CellClass The type of the cells we work with. + * \parblock + * This class must provide a typedef named CostType that identifies + * the structure used to store data. + * \endparblock + */ +template<typename OctreeClass, typename CellClass> +class FCostZones { +public: + using CostType = typename CellClass::costtype; + + /** + * \brief Class used to store the bounds of a zone. + * The bounds consist in the Morton index of the first node and the number + * of subsequent nodes. + */ + using BoundClass = std::pair<MortonIndex, int>; + /// Initial value for empty bounds. + const BoundClass _boundInit {-1,0}; + + +protected: + /// Alias to FCoordColour#coord2colour -*- not optimised + constexpr static int(*coord2colour)(const FTreeCoordinate&) + = FCoordColour::coord2colour; + + /** + * \brief Enumeration to specify the children to move to during the in-order + * traversal. + */ + enum ChildrenSide {LEFT, RIGHT}; + + + /// The iterator to move through the tree. + typename OctreeClass::Iterator _it; + /// The number of zones to create. + int _nbZones; + /// The tree height. + int _treeHeight; + /// The highest level in the tree in which to consider cells (inclusive) + int _topMostLevel = 0; + /// The lowest level in the tree in which to consider cells (exclusive) + int _bottomMostLevel = 1; + + + /// The current cumulative cost of visited cells. + CostType _internalCurrentCost = 0; + /// The total cost of internal cell. + CostType _internalTotalCost = 0; + /** + * \brief The vector containing the boundaries of the internal node zones + * + * Sorted by zone > level > bounds. + * \details This means there are _treeHeight elements in the inner vectors. + */ + std::vector< std::vector< BoundClass > > _internalZoneBounds; + + + /// The current cumulative cost of the visited leaves. + std::vector<CostType> _leafCurrentCost; + /// The total cost of the leaves. + std::vector<CostType> _leafTotalCost; + /** + * \brief The vector containing the boundaries of the leaf zones + * + * Sorted by zone > colour > bounds. + * \details This means there are FCoordColour::range elements in the inner + * vectors. + */ + std::vector< std::vector< BoundClass > > _leafZoneBounds; + + /// The vector containing the costzone cells sorted by zone > level > cells. + std::vector< std::vector< std::pair<int, CellClass*> > > _zones; + + + +public: + + /** + * \brief Constructor + * \param tree The tree to work on. + * \param nbZones The number of zones to create. + */ + FCostZones(OctreeClass* tree, int nbZones) : + _it( tree ), + _nbZones( nbZones ), + _treeHeight( tree->getHeight() ), + _bottomMostLevel(_treeHeight), + _internalZoneBounds( + _nbZones, + std::vector< BoundClass >(_treeHeight, _boundInit )), + _leafCurrentCost( FCoordColour::range, 0), + _leafTotalCost( FCoordColour::range, 0), + _leafZoneBounds( + _nbZones, + std::vector< BoundClass >(FCoordColour::range, _boundInit)), + _zones( + _nbZones, + std::vector< std::pair< int, CellClass*> >( )) + { + _it.gotoBottomLeft(); + typename OctreeClass::Iterator ittest(_it); + ittest.gotoBottomLeft(); + } + + /** + * \brief Gets the computed zones. + * + * See #_zones. + * \return The computed zones. + */ + const std::vector< std::vector< std::pair<int, CellClass*> > >& getZones() const { + return _zones; + } + + /** + * \brief Gets the computed internal zone bounds. + * + * See #_internalZoneBounds. + * \return The computed zone bounds. + */ + const std::vector< std::vector< BoundClass > >& getZoneBounds() const { + return _internalZoneBounds; + } + + /** + * \brief Gets the computed leaf zone bounds. + * + * See #_leafZoneBounds. + * \return The computed zone bounds. + */ + const std::vector< std::vector< BoundClass > >& getLeafZoneBounds() const { + return _leafZoneBounds; + } + + /// Gets the tree topmost level used. + int getTopMostLevel() const { + return _topMostLevel; + } + + /// Sets the tree topmost level used. + void setTopMostLevel(unsigned int level) { + if( level > _treeHeight-1 ) { + std::stringstream msgstream; + msgstream << __FUNCTION__ << ": level is to deep. level=" << level + << " tree height=" << _treeHeight; + throw std::out_of_range(msgstream.str()); + } + + _topMostLevel = level; + } + + /// Gets the tree bottom most level that we use. + int getBottomMostLevel() const { + return _bottomMostLevel; + } + + /// Sets the tree bottom most level that we use. + void setBottomMostLevel(unsigned int level) { + if( level > _treeHeight-1 ) { + std::stringstream msgstream; + msgstream << __FUNCTION__ << ": level is to deep. level=" << level + << " tree height=" << _treeHeight; + throw std::out_of_range(msgstream.str()); + } + + _bottomMostLevel = level; + } + + + /** + * \brief Runs the costzones algorithm. + */ + void run() { + + + // Compute tree leaves total cost; + computeLeavesCost(); + // Compute tree internal nodes total cost; + computeInternalCost(); + + // Count the root's children (the root is not stored in the tree) + _it.gotoTop(); + _it.gotoLeft(); + do { + this->costzones(); + } while( _it.moveRight()); + + // int nbRootChildren = 0; + // do { + // nbRootChildren++; + // } while(_it.moveRight()); + + // _it.gotoLeft(); + // // Compute costzones, we have to do the first level manualy + // for ( int i = 0; i < nbRootChildren; i++ ) { + // this->costzones(); + // _it.moveRight(); + // } + + } + +protected: + + /** + * \brief Main costzone algorithm. + * + * Moves through the tree in-order and assigns each cell to a zone. When a + * zone's cumulative cost is too high, the new cells are insterted in the + * next one. + */ + void costzones() { + + std::pair<int,int> childrenCount; + const int level = _it.level(); + const bool progressDown = _it.canProgressToDown() + && (level < _bottomMostLevel); + const bool useCell = (level < _bottomMostLevel) + && (level >= _topMostLevel); + + // When not on a leaf, apply to left children first + if ( progressDown ) { + childrenCount = countLeftRightChildren(); + callCostZonesOnChildren(LEFT, childrenCount); + } + + if( useCell ) + addCurrentCell(); + + // When not on a leaf, apply to right children + if ( progressDown ) { + callCostZonesOnChildren(RIGHT, childrenCount); + } + + } + + + /** + * \brief Applies costzones to the left or right children of the current cell. + * + * The current cell is the one currently pointed at by the iterator _it. + * + * \warning You must check by yourself whether the cell is a leaf or not. + * + * \param side The children side we want to visit. + * \param childrenCount The children count as returned by + * countLeftRightChildren. + */ + void callCostZonesOnChildren(const ChildrenSide side, const std::pair<int, int>& childrenCount) { + + const int& nbChildren = (side == LEFT ? childrenCount.first : childrenCount.second); + + // Don't move if there are no children on the right when we want to + // visit them. We test this before moving in case one day moving in the + // tree becomes expensive. + if ( side == RIGHT && childrenCount.second == 0) + return; + + // move down to the children level + _it.moveDown(); + + if ( side == RIGHT ) { + // move to the first right child + for ( int childIdx = 0; childIdx < childrenCount.first; childIdx++) { + _it.moveRight(); + } + } + + // Call costzones + for ( int childIdx = 0; childIdx < nbChildren; childIdx++ ) { + this->costzones(); + if(childIdx < nbChildren -1) // nbChildren-1 to avoid changing tree + _it.moveRight(); + } + + // move up to the cell level + _it.moveUp(); + + } + + + /** + * \brief Adds the current cell to a zone. + * + * The choice of the zone is made according to the current cost accumulation + * compared to the mean cost of a zone (_totalCost/_nbZones +1). + * + * This method uses the following attributes to choose the zone into which + * the current cell must be stored : + * + * - #_internalCurrentCost + * - #_leafCurrentCost + * - #_internalTotalCost + * - #_leafTotalCost + * - #_nbZones + */ + void addCurrentCell() { + + const int& level = _it.level(); + CellClass* cell = _it.getCurrentCell(); + CostType cellCost = cell->getCost(); + bool isLeaf = (level == _treeHeight -1); + + if ( 0 == cellCost ) { + return; + } + + // find cell zone + long long int cellZone = 0; + + // Near-field zone ///////////////// + if( isLeaf ) { + CostType leafCost = cell->getNearCost(); + int colour = coord2colour(_it.getCurrentGlobalCoordinate()); + + cellZone = _leafCurrentCost[colour] * _nbZones / (_leafTotalCost[colour]+1); + _leafCurrentCost[colour] += leafCost; + + if( _leafZoneBounds.at(cellZone)[colour] == _boundInit ) { + _leafZoneBounds.at(cellZone)[colour].first = _it.getCurrentGlobalIndex(); + _leafZoneBounds.at(cellZone)[colour].second = 1; + } else { + _leafZoneBounds.at(cellZone)[colour].second++; + } + } + //////////////////////////////////// + + // Far-field zone ////////////////// + cellZone = _internalCurrentCost * _nbZones / (_internalTotalCost+1); + + _internalCurrentCost += cellCost; + + if( _boundInit == _internalZoneBounds.at(cellZone)[level] ) { + _internalZoneBounds.at(cellZone)[level].first = + _it.getCurrentGlobalIndex(); + _internalZoneBounds.at(cellZone)[level].second = 1; + } else { + _internalZoneBounds.at(cellZone)[level].second++; + } + /////////////////////////////// + + // add cell to exhaustive zone vector + (_zones.at(cellZone)).emplace_back(level, cell); + } + + + /** + * \brief Computes and stores the leaves' total cost. + * + * The tree itertor (#_it) is moved to the bottom level of the + * tree by this method. After the method returns, the iterator is left at + * the rightmost leaf. + */ + void computeLeavesCost() { + + // Reset colour costs. + for( CostType& colourCost : _leafTotalCost ) { + colourCost = 0; + } + + _it.gotoBottomLeft(); + do { + int leafColour = coord2colour(_it.getCurrentGlobalCoordinate()); + _leafTotalCost[leafColour] += _it.getCurrentCell()->getNearCost(); + } while(_it.moveRight()); + + } + + /** + * \brief Computes and stores the internal cells' total cost. + * \warning This method makes use of + * + * The tree itertor (#_it) is moved to the bottom level of the + * tree by this method. After the method returns, the iterator is left at + * the rightmost leaf. + */ + void computeInternalCost() { + _it.gotoBottomLeft(); + //_it.moveUp(); + + while( _it.level() >= _bottomMostLevel ) { + _it.moveUp(); + } + + do { + _it.gotoLeft(); + do { + _internalTotalCost += _it.getCurrentCell()->getCost(); + } while(_it.moveRight()); + } while(_it.moveUp()); + + } + + /** + * \brief Counts the left and right children of the current cell. + * + * The current cell is the one currently pointed at by the iterator _it. + * + * \warning It must be checked whether the current cell is a leaf or not + * before calling this method. + * + * \return A pair of int containing the count of left (first) and right + * (second) children. + */ + std::pair<int,int> countLeftRightChildren() { + CellClass** children = _it.getCurrentChildren(); + int nbLeftChildren = 0, nbRightChildren = 0; + // Left children + for ( int childIdx = 0; childIdx < 4; childIdx++) { + if ( children[childIdx] != nullptr ) { + ++ nbLeftChildren; + } + } + // Right children + for ( int childIdx = 4; childIdx < 8; childIdx++) { + if ( children[childIdx] != nullptr) { + ++ nbRightChildren; + } + } + + return std::pair<int,int> (nbLeftChildren, nbRightChildren); + } + + + +}; + +#endif diff --git a/Src/BalanceTree/FFmmAlgorithmThreadBalanced.hpp b/Src/BalanceTree/FFmmAlgorithmThreadBalanced.hpp new file mode 100644 index 0000000000000000000000000000000000000000..01438fc3428951ace07dc86f30ba6471a1e49df3 --- /dev/null +++ b/Src/BalanceTree/FFmmAlgorithmThreadBalanced.hpp @@ -0,0 +1,595 @@ +// =================================================================================== +// Copyright ScalFmm 2011 INRIA, Olivier Coulaud, Berenger Bramas, Matthias Messner +// olivier.coulaud@inria.fr, berenger.bramas@inria.fr +// This software is a computer program whose purpose is to compute the FMM. +// +// This software is governed by the CeCILL-C and LGPL licenses and +// abiding by the rules of distribution of free software. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public and CeCILL-C Licenses for more details. +// "http://www.cecill.info". +// "http://www.gnu.org/licenses". +// =================================================================================== + +// ==== CMAKE ==== +// Keep in private GIT +// @SCALFMM_PRIVATE + + + +#ifndef FFMMALGORITHMTHREADBALANCED_HPP +#define FFMMALGORITHMTHREADBALANCED_HPP + + +#include "../Src/Utils/FAssert.hpp" +#include "../Src/Utils/FLog.hpp" + +#include "../Src/Utils/FTic.hpp" +#include "../Src/Utils/FGlobal.hpp" +#include "../Src/Utils/FAlgorithmTimers.hpp" + +#include "../Src/Containers/FOctree.hpp" + +#include "../Src/Core/FCoreCommon.hpp" + +#include "../Src/BalanceTree/FCoordColour.hpp" + +#include <vector> + +#include <omp.h> + +/** + * \brief Implements a threaded FMM algorithm using OpenMP. + * + * \author Quentin Khan, original file: Berenger Bramas <berenger.bramas@inria.fr> + * \copyright Please read the license. + * + * This class runs a threaded FMM algorithm. It just iterates on a tree and call + * the kernels with good arguments. The inspector-executor model is used : the + * class iterates on the tree and builds an array and works in parallel on this + * array. + * + * This algorithm uses the P2P in a thread safe manner, even if the kernel does + * not initially take care of it. When working on a leaf, a kernel may want to + * write to the leaf direct neighbours. To avoid any concurrent write, we use 27 + * colours (which is the maximum number of neighbours for a point in a 3D grid). + * All leaves of a given colour are separated by a least 2 leaves. This means + * that all threads can work on the same colour at the same time. + * + * For example, in 2D, one would have a grid looking like the following, where + * each number represents a coloured cell. The grid has been cut to work on + * cells which have a colour value of 4. + * + * 0 1 2 | 0 1 2 + * 3 4 5 | 3 4 5 + * 6 7 8 | 6 7 8 + * ------+------ + * 0 1 2 | 0 1 2 + * 3 4 5 | 3 4 5 + * 6 7 8 | 6 7 8 + * + * \note Upon destruction, this class does not free pointers given to its constructor. + */ +template<class OctreeClass, class CellClass, class ContainerClass, class KernelClass, class LeafClass> +class FFmmAlgorithmThreadBalanced : public FAbstractAlgorithm, public FAlgorithmTimers{ + + /// Shortened tree iterator class. + using TreeIterator = typename OctreeClass::Iterator; + /// Factorisation of the class holding the zone bounds. + using ZoneBoundClass = std::pair<MortonIndex, int>; + + OctreeClass* const tree; ///< The octree to work on. + KernelClass** kernels; ///< The kernels. + + //static const int SizeColour = 3*3*3; ///< Leaf colours count, see. + + const int MaxThreads; ///< The maximum number of threads. + const int OctreeHeight; ///< The height of the given tree. + + /// The vector containing the costzones + const std::vector<std::vector<ZoneBoundClass>>& costzones; + /// The vector containing the costzones + const std::vector<std::vector<ZoneBoundClass>>& leafcostzones; + +public: + /** + * \brief Class constructor + * + * The constructor needs the octree and the kernel used for computation. + * + * \warning Internally, one kernel is built for each thread, and each works + * on its own copy. This means the kernel cannot assume anything about the + * parts of the tree it will be executed on. + * + * \param inTree The octree to work on. + * \param inKernels The kernel to call. + * \param inCostzones The cost zones for each thread. + * + * \except An exception is thrown if one of the arguments is NULL. + * \except An assertion checks that the number of threads is the same as the + * number of zones. + */ + FFmmAlgorithmThreadBalanced( + OctreeClass* const inTree, + KernelClass* const inKernel, + const std::vector<std::vector<ZoneBoundClass>>& internalCostzones, + const std::vector<std::vector<ZoneBoundClass>>& leafCostzones) : + tree(inTree) , + kernels(nullptr), + MaxThreads(omp_get_max_threads()), + OctreeHeight(tree->getHeight()), + costzones(internalCostzones), + leafcostzones(leafCostzones) { + + FAssertLF(tree, "Tree cannot be null."); + FAssertLF(internalCostzones.size() == static_cast<unsigned int>(MaxThreads), + std::string("Thread count is different from cost zone count (") + + std::to_string(MaxThreads) + + std::string(" : ") + + std::to_string(internalCostzones.size()) + + ")." + ); + + this->kernels = new KernelClass*[MaxThreads]; + + #pragma omp parallel for schedule(static) + for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread) { + #pragma omp critical (InitFFmmAlgorithmThreadBalanced) + { + this->kernels[idxThread] = new KernelClass(*inKernel); + } + } + + FAbstractAlgorithm::setNbLevelsInTree(OctreeHeight); + + FLOG(FLog::Controller << "FFmmAlgorithmThreadBalanced (Max Thread " << MaxThreads << ")\n"); + } + + /** \brief Default destructor */ + virtual ~FFmmAlgorithmThreadBalanced(){ + for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread){ + delete this->kernels[idxThread]; + } + delete [] this->kernels; + } + +protected: + /** + * \brief Runs the complete algorithm. + * + * \param operationsToProceed A flag combinaison to specifiy the operators + * to use. See FFmmOperations in FCoreCommon.hpp. + */ + void executeCore(const unsigned operationsToProceed) override { + + Timers[P2MTimer].tic(); + if(operationsToProceed & FFmmP2M) + bottomPass(); + Timers[P2MTimer].tac(); + + Timers[M2MTimer].tic(); + if(operationsToProceed & FFmmM2M) + upwardPass(); + Timers[M2MTimer].tac(); + + Timers[M2LTimer].tic(); + if(operationsToProceed & FFmmM2L) + transferPass(); + Timers[M2LTimer].tac(); + + Timers[L2LTimer].tic(); + if(operationsToProceed & FFmmL2L) + downardPass(); + Timers[L2LTimer].tac(); + + Timers[NearTimer].tic(); + if( (operationsToProceed & FFmmP2P) || (operationsToProceed & FFmmL2P) ) + directPass((operationsToProceed & FFmmP2P),(operationsToProceed & FFmmL2P)); + Timers[NearTimer].tac(); + + } + + ///////////////////////////////////////////////////////////////////////////// + // P2M + ///////////////////////////////////////////////////////////////////////////// + + /** \brief Runs the P2M kernel. */ + void bottomPass(){ + FLOG( FLog::Controller.write("\tStart Bottom Pass\n").write(FLog::Flush) ); + FLOG( FTic counterTime ); + + TreeIterator octreeIterator(tree); + octreeIterator.gotoBottomLeft(); + + // One pair per zone. + std::vector< std::pair<TreeIterator, int> > iterVector(0); + + /* Find iterators to leaf portion of each zone. + + Since we do not care about the leaves colour here, we use the number + of particles that each zone has for each colour and add them to count + the total number of particles for a zone. + + The zones are calculated sequentially, we use the same iterator and + save it's position when we change zone. + */ + for( std::vector<ZoneBoundClass> zone : leafcostzones ) { + int nbCells = 0; + for( ZoneBoundClass bounds : zone ) { + nbCells += bounds.second; + } + + iterVector.push_back( + std::pair<TreeIterator,int>( + octreeIterator, // Iterator to the current cell + nbCells)); // Cell count in zone + + // Move iterator to end of zone (which is the first of the next zone) + for( int idx = 0; idx < nbCells; idx++) { + octreeIterator.moveRight(); + } + } + + FLOG( FTic computationCounter ); + + #pragma omp parallel + { + const int threadIdx = omp_get_thread_num(); + KernelClass * const myThreadkernels = kernels[threadIdx]; + TreeIterator zoneIterator = iterVector.at(threadIdx).first; + int zoneCellCount = iterVector[threadIdx].second; + + // Call P2M on cells + while ( zoneCellCount-- > 0 ) { + myThreadkernels->P2M(zoneIterator.getCurrentCell(), // Cell + zoneIterator.getCurrentListSrc()); // Particles + zoneIterator.moveRight(); + } + } + + FLOG( computationCounter.tac() ); + + FLOG( FLog::Controller << "\tFinished (@Bottom Pass (P2M) = " + << counterTime.tacAndElapsed() << "s)\n" ); + FLOG( FLog::Controller << "\t\t Computation : " + << computationCounter.elapsed() << " s\n" ); + + } + + ///////////////////////////////////////////////////////////////////////////// + // Upward + ///////////////////////////////////////////////////////////////////////////// + + /** \brief Runs the M2M kernel. */ + void upwardPass() { + FLOG( FLog::Controller.write("\tStart Upward Pass\n").write(FLog::Flush); ); + FLOG( FTic counterTime ); + FLOG( FTic computationCounter ); + + // Start from leaf level - 1 + TreeIterator octreeIterator(tree); + octreeIterator.gotoBottomLeft(); + octreeIterator.moveUp(); // Avoid leaf level + + while( octreeIterator.level() > FAbstractAlgorithm::lowerWorkingLevel-1 ) { + octreeIterator.moveUp(); + } + + // Stores the iterators to the beginning of zones *per level* in REVERSE order! + // ie. levels in REVERSE ORDER > zones > (iterator,cell_count) + std::vector< std::vector< std::pair<TreeIterator, int> > > + reverseLevelIterVector(0); + + while( octreeIterator.level() >= FAbstractAlgorithm::upperWorkingLevel ) + { + int idxLevel = octreeIterator.level(); + std::vector< std::pair<TreeIterator, int> > levelVect; + // Find iterators to leaf portion of each zone. + for( std::vector<ZoneBoundClass> zone : costzones ) { + + levelVect.push_back( + std::pair<TreeIterator,int>( + octreeIterator, // Iterator to the current cell + zone[idxLevel].second)); // Cell count in zone + + // Get iterator to end of zone (which is the first of the next zone) + for( int idx = 0; idx < zone[idxLevel].second; idx++) { + octreeIterator.moveRight(); + } + + } + reverseLevelIterVector.emplace_back(levelVect); + + octreeIterator.moveUp(); + octreeIterator.gotoLeft(); + } + + // for each level from bottom to top + for( std::vector< std::pair<TreeIterator, int> > levelIterVector : + reverseLevelIterVector ) { + + FLOG(FTic counterTimeLevel); + FLOG(computationCounter.tic()); + #pragma omp parallel + { + const int threadNum = omp_get_thread_num(); + KernelClass * const myThreadkernels = kernels[threadNum]; + TreeIterator zoneIterator = levelIterVector[threadNum].first; + int zoneCellCount = levelIterVector[threadNum].second; + + while(zoneCellCount-- > 0) { + // We need the current cell and the child + // child is an array (of 8 child) that may be null + myThreadkernels->M2M( zoneIterator.getCurrentCell(), + zoneIterator.getCurrentChild(), + zoneIterator.level()); + zoneIterator.moveRight(); + } + + } + + FLOG(computationCounter.tac()); + FLOG( FLog::Controller << "\t\t>> Level " << octreeIterator.level() + << " = " << counterTimeLevel.tacAndElapsed() + << "s\n" ); + } + + FLOG( FLog::Controller << "\tFinished (@Upward Pass (M2M) = " + << counterTime.tacAndElapsed() << "s)\n" ); + FLOG( FLog::Controller << "\t\t Computation : " + << computationCounter.cumulated() << "s\n" ); + } + + ///////////////////////////////////////////////////////////////////////////// + // Transfer + ///////////////////////////////////////////////////////////////////////////// + + /** \brief Runs the M2L kernel. */ + void transferPass(){ + + FLOG( FLog::Controller.write("\tStart Downward Pass (M2L)\n").write(FLog::Flush); ); + FLOG(FTic counterTime); + FLOG(FTic computationCounter); + + TreeIterator octreeIterator(tree); + octreeIterator.moveDown(); + + for(int idxLevel = 2 ; idxLevel < FAbstractAlgorithm::upperWorkingLevel ; ++idxLevel){ + octreeIterator.moveDown(); + } + + + // for each levels + for(int idxLevel = FAbstractAlgorithm::upperWorkingLevel ; + idxLevel < FAbstractAlgorithm::lowerWorkingLevel ; + ++idxLevel ) + { + FLOG(FTic counterTimeLevel); + + std::vector< std::pair<TreeIterator, int> > iterVector; + // Find iterators to leaf portion of each zone. + for( std::vector<ZoneBoundClass> zone : costzones ) { + iterVector.push_back( + std::pair<TreeIterator,int>( + octreeIterator, // Iterator to the current cell + zone[idxLevel].second)); // Cell count in zone + // Get iterator to end of zone (which is the first of the next zone) + for( int idx = 0; idx < zone[idxLevel].second; idx++) { + octreeIterator.moveRight(); + } + + } + + octreeIterator.moveDown(); + octreeIterator.gotoLeft(); + + FLOG(computationCounter.tic()); + + #pragma omp parallel + { + const int threadNum = omp_get_thread_num(); + KernelClass * const myThreadkernels = kernels[threadNum]; + const CellClass* neighbours[343]; + TreeIterator zoneIterator = iterVector[threadNum].first; + int zoneCellCount = iterVector[threadNum].second; + + while(zoneCellCount-- > 0) { + const int counter = + tree->getInteractionNeighbors( + neighbours, + zoneIterator.getCurrentGlobalCoordinate(), + idxLevel); + if(counter) + myThreadkernels->M2L( + zoneIterator.getCurrentCell(), + neighbours, + counter, + idxLevel); + zoneIterator.moveRight(); + } + + myThreadkernels->finishedLevelM2L(idxLevel); + + } + + FLOG( computationCounter.tac() ); + FLOG( FLog::Controller << "\t\t>> Level " + << idxLevel << " = " + << counterTimeLevel.tacAndElapsed() + << "s\n" ); + } + + FLOG( FLog::Controller << "\tFinished (@Downward Pass (M2L) = " + << counterTime.tacAndElapsed() << "s)\n" ); + FLOG( FLog::Controller << "\t\t Computation : " + << computationCounter.cumulated() << " s\n" ); + } + + ///////////////////////////////////////////////////////////////////////////// + // Downward + ///////////////////////////////////////////////////////////////////////////// + + /** \brief Runs the L2L kernel. */ + void downardPass(){ + + FLOG( FLog::Controller.write("\tStart Downward Pass (L2L)\n").write(FLog::Flush); ); + FLOG(FTic counterTime); + FLOG(FTic computationCounter); + + TreeIterator octreeIterator(tree); + octreeIterator.moveDown(); + + for(int idxLevel = 2 ; idxLevel < FAbstractAlgorithm::upperWorkingLevel ; ++idxLevel){ + octreeIterator.moveDown(); + } + + const int heightMinusOne = FAbstractAlgorithm::lowerWorkingLevel - 1; + // for each levels excepted leaf level + for(int idxLevel = FAbstractAlgorithm::upperWorkingLevel ; + idxLevel < heightMinusOne ; + ++idxLevel ) + { + FLOG(FTic counterTimeLevel); + + std::vector< std::pair<TreeIterator, int> > iterVector; + // Find iterators to leaf portion of each zone. + for( std::vector<ZoneBoundClass> zone : costzones ) { + iterVector.push_back( + std::pair<TreeIterator,int>( + octreeIterator, // Iterator to the current cell + zone[idxLevel].second)); // Cell count in zone + // Get iterator to end of zone (which is the first of the next zone) + for( int idx = 0; idx < zone[idxLevel].second; idx++) { + octreeIterator.moveRight(); + } + } + octreeIterator.gotoLeft(); + octreeIterator.moveDown(); + + FLOG(computationCounter.tic()); + + #pragma omp parallel + { + const int threadNum = omp_get_thread_num(); + KernelClass * const myThreadkernels = kernels[threadNum]; + TreeIterator zoneIterator = iterVector[threadNum].first; + int zoneCellCount = iterVector[threadNum].second; + + while( zoneCellCount-- > 0 ) { + myThreadkernels->L2L( + zoneIterator.getCurrentCell(), + zoneIterator.getCurrentChild(), + idxLevel); + zoneIterator.moveRight(); + } + } + + FLOG(computationCounter.tac()); + FLOG( FLog::Controller << "\t\t>> Level " << idxLevel << " = " << counterTimeLevel.tacAndElapsed() << "s\n" ); + } + + FLOG( FLog::Controller << "\tFinished (@Downward Pass (L2L) = " << counterTime.tacAndElapsed() << "s)\n" ); + FLOG( FLog::Controller << "\t\t Computation : " << computationCounter.cumulated() << " s\n" ); + } + + + + ///////////////////////////////////////////////////////////////////////////// + // Direct + ///////////////////////////////////////////////////////////////////////////// + + /** + * \brief Runs the P2P & L2P kernels. + * + * \param p2pEnabled Run the P2P kernel. + * \param l2pEnabled Run the L2P kernel. + */ + void directPass(const bool p2pEnabled, const bool l2pEnabled){ + FLOG( FLog::Controller.write("\tStart Direct Pass\n").write(FLog::Flush); ); + FLOG( FTic counterTime ); + FLOG( FTic computationCounter ); + FLOG( FTic computationCounterP2P ); + + + struct LeafData { + MortonIndex index; + CellClass* cell; + ContainerClass* targets; + ContainerClass* sources; + }; + + const int leafLevel = OctreeHeight - 1; + #pragma omp parallel + { + const int threadIdx = omp_get_thread_num(); + + ContainerClass* neighbours[27]; + KernelClass& myThreadkernel = (*kernels[threadIdx]); + TreeIterator it(tree); + + for( int colourIdx = 0; colourIdx < FCoordColour::range; colourIdx++) { + it.gotoBottomLeft(); + + const MortonIndex startIdx = leafcostzones[threadIdx][colourIdx].first; + int zoneCellCount = leafcostzones[threadIdx][colourIdx].second; + + if( 0 < zoneCellCount) { + while(startIdx != it.getCurrentGlobalIndex()) { + it.moveRight(); + } + } + + LeafData leafdata; + while( zoneCellCount > 0) { + if( FCoordColour::coord2colour(it.getCurrentCell()->getCoordinate()) == colourIdx) { + + leafdata.index = it.getCurrentGlobalIndex(); + leafdata.cell = it.getCurrentCell(); + leafdata.targets = it.getCurrentListTargets(); + leafdata.sources = it.getCurrentListSrc(); + + if( l2pEnabled ) { + myThreadkernel.L2P(leafdata.cell, leafdata.targets); + } + if( p2pEnabled ){ + // need the current particles and neighbours particles + + const int counter = + tree->getLeafsNeighbors(neighbours, + leafdata.cell->getCoordinate(), + leafLevel); + + myThreadkernel.P2P(leafdata.cell->getCoordinate(), + leafdata.targets, + leafdata.sources, + neighbours, + counter); + } + + zoneCellCount--; + } + + it.moveRight(); + } + + #pragma omp barrier + } + + } + + + + + FLOG( FLog::Controller << "\tFinished (@Direct Pass (L2P + P2P) = " << counterTime.tacAndElapsed() << "s)\n" ); + FLOG( FLog::Controller << "\t\t Computation L2P + P2P : " << computationCounter.cumulated() << " s\n" ); + FLOG( FLog::Controller << "\t\t Computation P2P : " << computationCounterP2P.cumulated() << " s\n" ); + + } + +}; + + +#endif //FFMMALGORITHMTHREADBALANCED_HPP diff --git a/Src/Components/FTypedLeaf.hpp b/Src/Components/FTypedLeaf.hpp index c590d7f3ae4003e3e0890e9635594d03d94509fd..f29332e5f5092c7a993e5cd2f692e046d6a9f0a3 100644 --- a/Src/Components/FTypedLeaf.hpp +++ b/Src/Components/FTypedLeaf.hpp @@ -4,13 +4,13 @@ // This software is a computer program whose purpose is to compute the FMM. // // This software is governed by the CeCILL-C and LGPL licenses and -// abiding by the rules of distribution of free software. -// +// abiding by the rules of distribution of free software. +// // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public and CeCILL-C Licenses for more details. -// "http://www.cecill.info". +// "http://www.cecill.info". // "http://www.gnu.org/licenses". // =================================================================================== #ifndef FTYPEDLEAF_HPP @@ -55,6 +55,20 @@ public: else sources.push(inParticlePosition, FParticleTypeSource, args...); } + /** + * To add a new particle in the leaf + * @param inParticlePosition the position of the new particle + * @param isTarget bool to know if it is a target + * followed by other param given by the user + */ + template<typename... Args> + void push(const FPoint<FReal>& inParticlePosition, Args ... args){ + FAssert(0,"Error : cannot push a particle without specifying type (src/tgt)"); + } + + + + /** * To get all the sources in a leaf * @return a pointer to the list of particles that are sources @@ -75,5 +89,3 @@ public: #endif //FTYPEDLEAF_HPP - - diff --git a/Src/Containers/FBoolArray.hpp b/Src/Containers/FBoolArray.hpp index 0250f07faee3dbeb92eb04d34374ef6048bf2682..1dd9168f6e861b368beca339d66dfe80f11331e0 100644 --- a/Src/Containers/FBoolArray.hpp +++ b/Src/Containers/FBoolArray.hpp @@ -16,7 +16,8 @@ #ifndef FBOOLARRAY_HPP #define FBOOLARRAY_HPP - +#include "../Utils/FGlobal.hpp" +#include "../Utils/FAssert.hpp" // To get memcpy #include <cstring> @@ -31,29 +32,29 @@ */ class FBoolArray{ /** Size of a unsigned long */ - const static int BytesInBlock = sizeof(unsigned long); - const static int SizeOfBlock = BytesInBlock * 8; + const static FSize BytesInBlock = sizeof(unsigned long); + const static FSize SizeOfBlock = BytesInBlock * 8; /** The array to store bits */ - unsigned long* const array; + unsigned long* array; /** Size of the memory allocated */ - const int memSize; + FSize memSize; /** Size of the array => number of real elements */ - const int size; + FSize size; /** get size to number of long */ - int LongFromSize(const int inSize){ + FSize LongFromSize(const FSize inSize){ return ((inSize + SizeOfBlock - 1) / SizeOfBlock); } /** Alloc an array */ - unsigned long * AllocArray(const int inSize){ + unsigned long * AllocArray(const FSize inSize){ return new unsigned long[LongFromSize(inSize)]; } public : /** Constructor with size */ - FBoolArray(const int inSize) : array(AllocArray(inSize)), memSize(LongFromSize(inSize)*BytesInBlock), size(inSize) { + explicit FBoolArray(const FSize inSize = 0) : array(AllocArray(inSize)), memSize(LongFromSize(inSize)*BytesInBlock), size(inSize) { setToZeros(); } @@ -62,6 +63,25 @@ public : *this = other; } + /** Move the data */ + FBoolArray(FBoolArray&& other): array(nullptr), memSize(0), size(0){ + array = other.array; + memSize = other.memSize; + size = other.size; + other.array = nullptr; + other.memSize = 0; + other.size = 0; + } + + /** remove all values and allocate new array */ + void reset(const FSize inSize){ + delete [] array; + array = (AllocArray(inSize)); + memSize = (LongFromSize(inSize)*BytesInBlock); + size = (inSize); + setToZeros(); + } + /** Destructor */ ~FBoolArray(){ delete [] array; @@ -72,10 +92,25 @@ public : * Array must have the same size */ FBoolArray& operator=(const FBoolArray& other){ + FAssertLF(size == other.size); memcpy(array, other.array, memSize); return *this; } + /** + * Move the data from one array to the other + */ + FBoolArray& operator=(FBoolArray&& other){ + delete [] array; + array = other.array; + memSize = other.memSize; + size = other.size; + other.array = nullptr; + other.memSize = 0; + other.size = 0; + return *this; + } + /** * Operator == * Array must have the same size @@ -93,22 +128,22 @@ public : } /** To get a value */ - bool get(const int inPos) const { - const int posInArray = inPos / SizeOfBlock; - const int bytePosition = inPos - (posInArray * 8); + bool get(const FSize inPos) const { + const FSize posInArray = inPos / SizeOfBlock; + const FSize bytePosition = inPos - (posInArray * 8); return (array[posInArray] >> bytePosition) & 1; } /** To set a value */ - void set(const int inPos, const bool inVal){ - const int posInArray = inPos / SizeOfBlock; - const int bytePosition = inPos - (posInArray * 8); + void set(const FSize inPos, const bool inVal){ + const FSize posInArray = inPos / SizeOfBlock; + const FSize bytePosition = inPos - (posInArray * 8); if(inVal) array[posInArray] |= (1UL << bytePosition); else array[posInArray] &= ~(1UL << bytePosition); } /** To get the size of the array */ - int getSize() const { + FSize getSize() const { return size; } @@ -116,6 +151,11 @@ public : void setToZeros() const { memset( array, 0, memSize); } + + /** Set all the memory to 1 */ + void setToOnes() const { + memset( array, (unsigned char)0xFF, memSize); + } }; diff --git a/Src/Containers/FOctree.hpp b/Src/Containers/FOctree.hpp index 607e3b1f7af84bfccf4781bd4f1423a7e0dd61b0..2ff347f2b34c166f70e7d5a90bafc6da18c5ba57 100644 --- a/Src/Containers/FOctree.hpp +++ b/Src/Containers/FOctree.hpp @@ -808,8 +808,8 @@ public: * @return the number of neighbors */ int getInteractionNeighbors(const CellClass* inNeighbors[343], - const FTreeCoordinate& workingCell, - const int inLevel) const{ + const FTreeCoordinate& workingCell, + const int inLevel, const int neighSeparation = 1) const{ // reset memset(inNeighbors, 0, sizeof(CellClass*) * 343); @@ -832,7 +832,7 @@ public: if(!FMath::Between(parentCell.getZ() + idxZ,0,boxLimite)) continue; // if we are not on the current cell - if( idxX || idxY || idxZ ){ + if( neighSeparation<1 || idxX || idxY || idxZ ){ const FTreeCoordinate otherParent(parentCell.getX() + idxX,parentCell.getY() + idxY,parentCell.getZ() + idxZ); const MortonIndex mortonOtherParent = otherParent.getMortonIndex(inLevel-1) << 3; // Get child @@ -848,7 +848,7 @@ public: const int zdiff = ((otherParent.getZ()<<1) | (idxCousin&1)) - workingCell.getZ(); // Test if it is a direct neighbor - if(FMath::Abs(xdiff) > 1 || FMath::Abs(ydiff) > 1 || FMath::Abs(zdiff) > 1){ + if(FMath::Abs(xdiff) > neighSeparation || FMath::Abs(ydiff) > neighSeparation || FMath::Abs(zdiff) > neighSeparation){ // add to neighbors inNeighbors[ (((xdiff+3) * 7) + (ydiff+3)) * 7 + zdiff + 3] = cells[idxCousin]; ++idxNeighbors; @@ -875,8 +875,8 @@ public: * @return the number of neighbors */ int getFullNeighborhood(const CellClass* inNeighbors[343], - const FTreeCoordinate& workingCell, - const int inLevel) const{ + const FTreeCoordinate& workingCell, + const int inLevel) const{ // reset memset(inNeighbors, 0, sizeof(CellClass*) * 343); @@ -935,10 +935,8 @@ public: * @return the number of neighbors */ int getPeriodicInteractionNeighbors(const CellClass* inNeighbors[343], - const FTreeCoordinate& workingCell, - const int inLevel, const int inDirection) const{ - // TODO : REMOVE NEXT COMMENTS - // std::cout << " Begin in getPeriodicInteractionNeighbors"<<std::endl; + const FTreeCoordinate& workingCell, + const int inLevel, const int inDirection, const int neighSeparation = 1) const{ // Then take each child of the parent's neighbors if not in directNeighbors // Father coordinate @@ -963,18 +961,14 @@ public: const int endY = (TestPeriodicCondition(inDirection, DirPlusY) || parentCell.getY() != boxLimite - 1 ?1:0); const int startZ = (TestPeriodicCondition(inDirection, DirMinusZ) || parentCell.getZ() != 0 ?-1:0); const int endZ = (TestPeriodicCondition(inDirection, DirPlusZ) || parentCell.getZ() != boxLimite - 1 ?1:0); - // TODO : REMOVE NEXT COMMENTS - // std::cout << " -- startX " << startX << " endX "<< endX<< std::endl ; - // std::cout << " -- startY " << startY << " endX "<< endY<< std::endl ; - // std::cout << " -- startZ " << startZ << " endX "<< endZ<< std::endl ; - // std::cout << " boxLimite "<< boxLimite<<std::endl; + int idxNeighbors = 0; // We test all cells around for(int idxX = startX ; idxX <= endX ; ++idxX){ for(int idxY = startY ; idxY <= endY ; ++idxY){ for(int idxZ = startZ ; idxZ <= endZ ; ++idxZ){ // if we are not on the current cell - if( idxX || idxY || idxZ ){ + if(neighSeparation<1 || idxX || idxY || idxZ ){ const FTreeCoordinate otherParent(parentCell.getX() + idxX,parentCell.getY() + idxY,parentCell.getZ() + idxZ); FTreeCoordinate otherParentInBox(otherParent); @@ -1018,15 +1012,10 @@ public: const int zdiff = ((otherParent.getZ()<<1) | (idxCousin&1)) - workingCell.getZ(); // Test if it is a direct neighbor - if(FMath::Abs(xdiff) > 1 || FMath::Abs(ydiff) > 1 || FMath::Abs(zdiff) > 1){ + if(FMath::Abs(xdiff) > neighSeparation || FMath::Abs(ydiff) > neighSeparation || FMath::Abs(zdiff) > neighSeparation){ // add to neighbors - // TODO : REMOVE NEXT COMMENTS - // std::cout << " Voisin numero "<< idxNeighbors - // << " indexinTab "<< (((xdiff+3) * 7) + (ydiff+3)) * 7 + zdiff + 3 - // << " idxXousin " << idxCousin<< std::endl; inNeighbors[ (((xdiff+3) * 7) + (ydiff+3)) * 7 + zdiff + 3] = cells[idxCousin]; ++idxNeighbors; - } } } diff --git a/Src/Containers/FTreeCoordinate.hpp b/Src/Containers/FTreeCoordinate.hpp index f84fc8e1e429cb237b07453016cfbaf29fa7226e..40d805d27e0a6d22398f7f9bc77305c58c849e96 100644 --- a/Src/Containers/FTreeCoordinate.hpp +++ b/Src/Containers/FTreeCoordinate.hpp @@ -342,7 +342,8 @@ public: return idxNeig; } - int getInteractionNeighbors(const int inLevel, MortonIndex inNeighbors[189], int inNeighborsPosition[189]) const{ + int getInteractionNeighbors(const int inLevel, MortonIndex inNeighbors[/*189+26+1*/216], int inNeighborsPosition[/*189+26+1*/216], + const int neighSeparation = 1) const{ // Then take each child of the parent's neighbors if not in directNeighbors // Father coordinate const FTreeCoordinate parentCell(this->getX()>>1,this->getY()>>1,this->getZ()>>1); @@ -362,7 +363,7 @@ public: if(!FMath::Between(parentCell.getZ() + idxZ,0,limite)) continue; // if we are not on the current cell - if( idxX || idxY || idxZ ){ + if(neighSeparation<1 || idxX || idxY || idxZ ){ const FTreeCoordinate otherParent(parentCell.getX() + idxX,parentCell.getY() + idxY,parentCell.getZ() + idxZ); const MortonIndex mortonOther = otherParent.getMortonIndex(inLevel-1); @@ -373,7 +374,7 @@ public: const int zdiff = ((otherParent.getZ()<<1) | (idxCousin&1)) - this->getZ(); // Test if it is a direct neighbor - if(FMath::Abs(xdiff) > 1 || FMath::Abs(ydiff) > 1 || FMath::Abs(zdiff) > 1){ + if(FMath::Abs(xdiff) > neighSeparation || FMath::Abs(ydiff) > neighSeparation || FMath::Abs(zdiff) > neighSeparation){ // add to neighbors inNeighborsPosition[idxNeighbors] = ((( (xdiff+3) * 7) + (ydiff+3))) * 7 + zdiff + 3; inNeighbors[idxNeighbors++] = (mortonOther << 3) | idxCousin; @@ -387,7 +388,7 @@ public: return idxNeighbors; } - int getInteractionNeighbors(const int inLevel, MortonIndex inNeighbors[189]) const{ + int getInteractionNeighbors(const int inLevel, MortonIndex inNeighbors[/*189+26+1*/216], const int neighSeparation = 1) const{ // Then take each child of the parent's neighbors if not in directNeighbors // Father coordinate const FTreeCoordinate parentCell(this->getX()>>1,this->getY()>>1,this->getZ()>>1); @@ -407,7 +408,7 @@ public: if(!FMath::Between(parentCell.getZ() + idxZ,0,limite)) continue; // if we are not on the current cell - if( idxX || idxY || idxZ ){ + if(neighSeparation<1 || idxX || idxY || idxZ ){ const FTreeCoordinate otherParent(parentCell.getX() + idxX,parentCell.getY() + idxY,parentCell.getZ() + idxZ); const MortonIndex mortonOther = otherParent.getMortonIndex(inLevel-1); @@ -418,7 +419,7 @@ public: const int zdiff = ((otherParent.getZ()<<1) | (idxCousin&1)) - this->getZ(); // Test if it is a direct neighbor - if(FMath::Abs(xdiff) > 1 || FMath::Abs(ydiff) > 1 || FMath::Abs(zdiff) > 1){ + if(FMath::Abs(xdiff) > neighSeparation || FMath::Abs(ydiff) > neighSeparation || FMath::Abs(zdiff) > neighSeparation){ // add to neighbors inNeighbors[idxNeighbors++] = (mortonOther << 3) | idxCousin; } diff --git a/Src/Core/FFmmAlgorithm.hpp b/Src/Core/FFmmAlgorithm.hpp index 5526e19afe7f30b9308aa2151def7c49a65d5aac..4804b459dbdca726f5f8b0133e1acab2af4abba3 100644 --- a/Src/Core/FFmmAlgorithm.hpp +++ b/Src/Core/FFmmAlgorithm.hpp @@ -47,6 +47,7 @@ class FFmmAlgorithm : public FAbstractAlgorithm, public FAlgorithmTimers { const int OctreeHeight; ///< The height of the given tree. + const int leafLevelSeparationCriteria; public: /** Class constructor * @@ -56,8 +57,8 @@ public: * * \except An exception is thrown if one of the arguments is NULL. */ - FFmmAlgorithm(OctreeClass* const inTree, KernelClass* const inKernels) - : tree(inTree) , kernels(inKernels), OctreeHeight(tree->getHeight()) { + FFmmAlgorithm(OctreeClass* const inTree, KernelClass* const inKernels, const int inLeafLevelSeparationCriteria = 1) + : tree(inTree) , kernels(inKernels), OctreeHeight(tree->getHeight()), leafLevelSeparationCriteria(inLeafLevelSeparationCriteria) { FAssertLF(tree, "tree cannot be null"); FAssertLF(kernels, "kernels cannot be null"); @@ -194,9 +195,11 @@ protected: for(int idxLevel = FAbstractAlgorithm::upperWorkingLevel ; idxLevel < FAbstractAlgorithm::lowerWorkingLevel ; ++idxLevel ){ FLOG(FTic counterTimeLevel); + const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeparationCriteria); + // for each cells do{ - const int counter = tree->getInteractionNeighbors(neighbors, octreeIterator.getCurrentGlobalCoordinate(), idxLevel); + const int counter = tree->getInteractionNeighbors(neighbors, octreeIterator.getCurrentGlobalCoordinate(), idxLevel, separationCriteria); FLOG(computationCounter.tic()); if(counter) kernels->M2L( octreeIterator.getCurrentCell() , neighbors, counter, idxLevel); FLOG(computationCounter.tac()); diff --git a/Src/Core/FFmmAlgorithmPeriodic.hpp b/Src/Core/FFmmAlgorithmPeriodic.hpp index e82dfce6adef62ec20245060705f0b71df472aff..bdd5f8cca3a34931a991c33d788369fbcb957947 100644 --- a/Src/Core/FFmmAlgorithmPeriodic.hpp +++ b/Src/Core/FFmmAlgorithmPeriodic.hpp @@ -51,6 +51,7 @@ class FFmmAlgorithmPeriodic : public FAbstractAlgorithm{ const int nbLevelsAboveRoot; //< The nb of level the user ask to go above the tree (>= -1) const int offsetRealTree; //< nbLevelsAboveRoot GetFackLevel + const int leafLevelSeperationCriteria; public: /** The constructor need the octree and the kernels used for computation @@ -60,9 +61,9 @@ public: * @param inUpperLevel this parameter defines the behavior of the periodicity refer to the main doc * */ - FFmmAlgorithmPeriodic(OctreeClass* const inTree, const int inUpperLevel = 0) + FFmmAlgorithmPeriodic(OctreeClass* const inTree, const int inUpperLevel = 0, const int inLeafLevelSeperationCriteria = 1) : tree(inTree) , kernels(nullptr), OctreeHeight(tree->getHeight()), - nbLevelsAboveRoot(inUpperLevel), offsetRealTree(inUpperLevel + 3) { + nbLevelsAboveRoot(inUpperLevel), offsetRealTree(inUpperLevel + 3), leafLevelSeperationCriteria(inLeafLevelSeperationCriteria) { FAssertLF(tree, "tree cannot be null"); FAssertLF(-1 <= inUpperLevel, "inUpperLevel cannot be < -1"); @@ -254,9 +255,10 @@ protected: for(int idxLevel = 1 ; idxLevel < OctreeHeight ; ++idxLevel ){ FLOG(FTic counterTimeLevel); const int fackLevel = idxLevel + offsetRealTree; + const int separationCriteria = (idxLevel != OctreeHeight-1 ? 1 : leafLevelSeperationCriteria); // for each cells do{ - const int counter = tree->getPeriodicInteractionNeighbors(neighbors, octreeIterator.getCurrentGlobalCoordinate(), idxLevel, AllDirs); + const int counter = tree->getPeriodicInteractionNeighbors(neighbors, octreeIterator.getCurrentGlobalCoordinate(), idxLevel, AllDirs, separationCriteria); FLOG(computationCounter.tic()); if(counter) kernels->M2L( octreeIterator.getCurrentCell() , neighbors, counter, fackLevel); FLOG(computationCounter.tac()); diff --git a/Src/Core/FFmmAlgorithmSectionTask.hpp b/Src/Core/FFmmAlgorithmSectionTask.hpp index 0a797b499044d8713e2ecb3caa34a3075fae4f64..0544d84814234404448f74643282892ea68e2921 100644 --- a/Src/Core/FFmmAlgorithmSectionTask.hpp +++ b/Src/Core/FFmmAlgorithmSectionTask.hpp @@ -49,15 +49,16 @@ class FFmmAlgorithmSectionTask : public FAbstractAlgorithm{ const int OctreeHeight; + const int leafLevelSeperationCriteria; public: /** The constructor need the octree and the kernels used for computation * @param inTree the octree to work on * @param inKernels the kernels to call * An assert is launched if one of the arguments is null */ - FFmmAlgorithmSectionTask(OctreeClass* const inTree, KernelClass* const inKernels) + FFmmAlgorithmSectionTask(OctreeClass* const inTree, KernelClass* const inKernels, const int inLeafLevelSeperationCriteria = 1) : tree(inTree) , kernels(0), - MaxThreads(omp_get_max_threads()), OctreeHeight(tree->getHeight()) + MaxThreads(omp_get_max_threads()), OctreeHeight(tree->getHeight()), leafLevelSeperationCriteria(inLeafLevelSeperationCriteria) { FAssertLF(tree, "tree cannot be null"); @@ -214,9 +215,10 @@ protected: // for each levels for(int idxLevel = FAbstractAlgorithm::upperWorkingLevel ; idxLevel < FAbstractAlgorithm::lowerWorkingLevel ; ++idxLevel ){ FLOG(FTic counterTimeLevel); + const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeperationCriteria); // for each cells do{ - int counter = tree->getInteractionNeighbors(neighbors, octreeIterator.getCurrentGlobalCoordinate(), idxLevel); + const int counter = tree->getInteractionNeighbors(neighbors, octreeIterator.getCurrentGlobalCoordinate(), idxLevel, separationCriteria); if(counter){ #pragma omp task firstprivate(octreeIterator, neighbors, counter) shared(idxLevel) { diff --git a/Src/Core/FFmmAlgorithmTask.hpp b/Src/Core/FFmmAlgorithmTask.hpp index 010c30a85b36e37ee0206aeac5d8faef135c60f5..84cd2c4c8e9cb57e4e3a63adbe88ed40433b40d0 100644 --- a/Src/Core/FFmmAlgorithmTask.hpp +++ b/Src/Core/FFmmAlgorithmTask.hpp @@ -40,7 +40,7 @@ * Of course this class does not deallocate pointer given in arguements. */ template<class OctreeClass, class CellClass, class ContainerClass, class KernelClass, class LeafClass> -class FFmmAlgorithmTask : public FAbstractAlgorithm{ +class FFmmAlgorithmTask : public FAbstractAlgorithm, public FAlgorithmTimers { OctreeClass* const tree; //< The octree to work on KernelClass** kernels; //< The kernels @@ -49,15 +49,16 @@ class FFmmAlgorithmTask : public FAbstractAlgorithm{ const int OctreeHeight; + const int leafLevelSeperationCriteria; public: /** The constructor need the octree and the kernels used for computation * @param inTree the octree to work on * @param inKernels the kernels to call * An assert is launched if one of the arguments is null */ - FFmmAlgorithmTask(OctreeClass* const inTree, KernelClass* const inKernels) + FFmmAlgorithmTask(OctreeClass* const inTree, KernelClass* const inKernels, const int inLeafLevelSeperationCriteria = 1) : tree(inTree) , kernels(nullptr), - MaxThreads(omp_get_max_threads()), OctreeHeight(tree->getHeight()) + MaxThreads(omp_get_max_threads()), OctreeHeight(tree->getHeight()), leafLevelSeperationCriteria(inLeafLevelSeperationCriteria) { FAssertLF(tree, "tree cannot be null"); @@ -92,15 +93,30 @@ protected: */ void executeCore(const unsigned operationsToProceed) override { - if(operationsToProceed & FFmmP2M) bottomPass(); - - if(operationsToProceed & FFmmM2M) upwardPass(); - - if(operationsToProceed & FFmmM2L) transferPass(); - - if(operationsToProceed & FFmmL2L) downardPass(); - - if((operationsToProceed & FFmmP2P) || (operationsToProceed & FFmmL2P)) directPass((operationsToProceed & FFmmP2P),(operationsToProceed & FFmmL2P)); + Timers[P2MTimer].tic(); + if(operationsToProceed & FFmmP2M) + bottomPass(); + Timers[P2MTimer].tac(); + + Timers[M2MTimer].tic(); + if(operationsToProceed & FFmmM2M) + upwardPass(); + Timers[M2MTimer].tac(); + + Timers[M2LTimer].tic(); + if(operationsToProceed & FFmmM2L) + transferPass(); + Timers[M2LTimer].tac(); + + Timers[L2LTimer].tic(); + if(operationsToProceed & FFmmL2L) + downardPass(); + Timers[L2LTimer].tac(); + + Timers[NearTimer].tic(); + if( (operationsToProceed & FFmmP2P) || (operationsToProceed & FFmmL2P) ) + directPass((operationsToProceed & FFmmP2P),(operationsToProceed & FFmmL2P)); + Timers[NearTimer].tac(); } ///////////////////////////////////////////////////////////////////////////// @@ -213,9 +229,10 @@ protected: // for each levels for(int idxLevel = FAbstractAlgorithm::upperWorkingLevel ; idxLevel < FAbstractAlgorithm::lowerWorkingLevel ; ++idxLevel ){ FLOG(FTic counterTimeLevel); + const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeperationCriteria); // for each cells do{ - int counter = tree->getInteractionNeighbors(neighbors, octreeIterator.getCurrentGlobalCoordinate(), idxLevel); + const int counter = tree->getInteractionNeighbors(neighbors, octreeIterator.getCurrentGlobalCoordinate(), idxLevel, separationCriteria); if(counter){ #pragma omp task firstprivate(octreeIterator, neighbors, counter) shared(idxLevel) { diff --git a/Src/Core/FFmmAlgorithmThread.hpp b/Src/Core/FFmmAlgorithmThread.hpp index e6701902407da46ace0557e76056780c757e722d..5f850aaafa045991f725a96d998de61f1baebcb1 100644 --- a/Src/Core/FFmmAlgorithmThread.hpp +++ b/Src/Core/FFmmAlgorithmThread.hpp @@ -60,18 +60,35 @@ class FFmmAlgorithmThread : public FAbstractAlgorithm, public FAlgorithmTimers{ const int OctreeHeight; ///< The height of the given tree. + const bool staticSchedule; + + const int leafLevelSeperationCriteria; + + template <class NumType> + NumType getChunkSize(const NumType inSize) const { + if(staticSchedule){ + return FMath::Max(NumType(1) , NumType(double(inSize)/double(omp_get_max_threads())) ); + } + else{ + return FMath::Max(NumType(1) , inSize/NumType(omp_get_max_threads()*omp_get_max_threads())); + } + } + public: /** Class constructor * * The constructor needs the octree and the kernels used for computation. * \param inTree the octree to work on. * \param inKernels the kernels to call. + * \param inStaticSchedule Whether to use static or dynamic OpenMP scheduling. * * \except An exception is thrown if one of the arguments is NULL. */ - FFmmAlgorithmThread(OctreeClass* const inTree, KernelClass* const inKernels) + FFmmAlgorithmThread(OctreeClass* const inTree, KernelClass* const inKernels, + const bool inStaticSchedule = true, const int inLeafLevelSeperationCriteria = 1) : tree(inTree) , kernels(nullptr), iterArray(nullptr), leafsNumber(0), - MaxThreads(omp_get_max_threads()), OctreeHeight(tree->getHeight()) { + MaxThreads(omp_get_max_threads()), OctreeHeight(tree->getHeight()), + staticSchedule(inStaticSchedule), leafLevelSeperationCriteria(inLeafLevelSeperationCriteria) { FAssertLF(tree, "tree cannot be null"); @@ -87,6 +104,7 @@ public: FAbstractAlgorithm::setNbLevelsInTree(tree->getHeight()); FLOG(FLog::Controller << "FFmmAlgorithmThread (Max Thread " << omp_get_max_threads() << ")\n"); + FLOG(FLog::Controller << "\t static schedule " << (staticSchedule?"TRUE":"FALSE") << ")\n"); } /** Default destructor */ @@ -162,7 +180,7 @@ protected: ++leafs; } while(octreeIterator.moveRight()); - const int chunkSize = FMath::Max(1 , leafs/(omp_get_max_threads()*omp_get_max_threads())); + const int chunkSize = getChunkSize(leafs); FLOG(FTic computationCounter); #pragma omp parallel @@ -215,7 +233,7 @@ protected: avoidGotoLeftIterator.moveUp(); octreeIterator = avoidGotoLeftIterator;// equal octreeIterator.moveUp(); octreeIterator.gotoLeft(); - const int chunkSize = FMath::Max(1 , numberOfCells/(omp_get_max_threads()*omp_get_max_threads())); + const int chunkSize = getChunkSize(numberOfCells); FLOG(computationCounter.tic()); #pragma omp parallel @@ -262,6 +280,7 @@ protected: // for each levels for(int idxLevel = FAbstractAlgorithm::upperWorkingLevel ; idxLevel < FAbstractAlgorithm::lowerWorkingLevel ; ++idxLevel ){ FLOG(FTic counterTimeLevel); + const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeperationCriteria); int numberOfCells = 0; // for each cells do{ @@ -271,7 +290,7 @@ protected: avoidGotoLeftIterator.moveDown(); octreeIterator = avoidGotoLeftIterator; - const int chunkSize = FMath::Max(1 , numberOfCells/(omp_get_max_threads()*omp_get_max_threads())); + const int chunkSize = getChunkSize(numberOfCells); FLOG(computationCounter.tic()); #pragma omp parallel @@ -281,7 +300,7 @@ protected: #pragma omp for schedule(dynamic, chunkSize) nowait for(int idxCell = 0 ; idxCell < numberOfCells ; ++idxCell){ - const int counter = tree->getInteractionNeighbors(neighbors, iterArray[idxCell].getCurrentGlobalCoordinate(),idxLevel); + const int counter = tree->getInteractionNeighbors(neighbors, iterArray[idxCell].getCurrentGlobalCoordinate(), idxLevel, separationCriteria); if(counter) myThreadkernels->M2L( iterArray[idxCell].getCurrentCell() , neighbors, counter, idxLevel); } @@ -329,7 +348,7 @@ protected: octreeIterator = avoidGotoLeftIterator; FLOG(computationCounter.tic()); - const int chunkSize = FMath::Max(1 , numberOfCells/(omp_get_max_threads()*omp_get_max_threads())); + const int chunkSize = getChunkSize(numberOfCells); #pragma omp parallel { KernelClass * const myThreadkernels = kernels[omp_get_thread_num()]; @@ -429,7 +448,7 @@ protected: for(int idxShape = 0 ; idxShape < SizeShape ; ++idxShape){ const int endAtThisShape = this->shapeLeaf[idxShape] + previous; - const int chunkSize = FMath::Max(1 , endAtThisShape/(omp_get_num_threads()*omp_get_num_threads())); + const int chunkSize = getChunkSize(endAtThisShape); #pragma omp for schedule(dynamic, chunkSize) for(int idxLeafs = previous ; idxLeafs < endAtThisShape ; ++idxLeafs){ LeafData& currentIter = leafsDataArray[idxLeafs]; diff --git a/Src/Core/FFmmAlgorithmThreadProc.hpp b/Src/Core/FFmmAlgorithmThreadProc.hpp index c96c035f2584e25bde2235fdc12d6e18214f39e6..2115736ace96d97dff2070ba412594951c826fc9 100644 --- a/Src/Core/FFmmAlgorithmThreadProc.hpp +++ b/Src/Core/FFmmAlgorithmThreadProc.hpp @@ -81,6 +81,7 @@ class FFmmAlgorithmThreadProc : public FAbstractAlgorithm, public FAlgorithmTime const int OctreeHeight; //<Height of the tree + const int leafLevelSeperationCriteria; /** An interval is the morton index interval * that a proc use (it holds data in this interval) @@ -134,10 +135,12 @@ public: * @param inKernels the kernels to call * An assert is launched if one of the arguments is null */ - FFmmAlgorithmThreadProc(const FMpi::FComm& inComm, OctreeClass* const inTree, KernelClass* const inKernels) + FFmmAlgorithmThreadProc(const FMpi::FComm& inComm, OctreeClass* const inTree, KernelClass* const inKernels, const int inLeafLevelSeperationCriteria = 1) : tree(inTree) , kernels(nullptr), comm(inComm), iterArray(nullptr),iterArrayComm(nullptr),numberOfLeafs(0), MaxThreads(omp_get_max_threads()), nbProcess(inComm.processCount()), idProcess(inComm.processId()), - OctreeHeight(tree->getHeight()),intervals(new Interval[inComm.processCount()]), + OctreeHeight(tree->getHeight()), + leafLevelSeperationCriteria(inLeafLevelSeperationCriteria), + intervals(new Interval[inComm.processCount()]), workingIntervalsPerLevel(new Interval[inComm.processCount() * tree->getHeight()]) { FAssertLF(tree, "tree cannot be null"); @@ -622,6 +625,9 @@ protected: typename OctreeClass::Iterator avoidGotoLeftIterator(octreeIterator); // for each levels for(int idxLevel = FAbstractAlgorithm::upperWorkingLevel ; idxLevel < FAbstractAlgorithm::lowerWorkingLevel ; ++idxLevel ){ + + const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeperationCriteria); + if(!procHasWorkAtLevel(idxLevel, idProcess)){ avoidGotoLeftIterator.moveDown(); octreeIterator = avoidGotoLeftIterator; @@ -646,10 +652,10 @@ protected: // Which cell potentialy needs other data and in the same time // are potentialy needed by other - MortonIndex neighborsIndexes[189]; + MortonIndex neighborsIndexes[/*189+26+1*/216]; for(int idxCell = 0 ; idxCell < numberOfCells ; ++idxCell){ // Find the M2L neigbors of a cell - const int counter = iterArrayLocal[idxCell].getCurrentGlobalCoordinate().getInteractionNeighbors(idxLevel, neighborsIndexes); + const int counter = iterArrayLocal[idxCell].getCurrentGlobalCoordinate().getInteractionNeighbors(idxLevel, neighborsIndexes, separationCriteria); memset(alreadySent, false, sizeof(bool) * nbProcess); bool needOther = false; @@ -778,6 +784,8 @@ protected: // Now we can compute all the data // for each levels for(int idxLevel = FAbstractAlgorithm::upperWorkingLevel ; idxLevel < FAbstractAlgorithm::lowerWorkingLevel ; ++idxLevel ){ + const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeperationCriteria); + if(!procHasWorkAtLevel(idxLevel, idProcess)){ avoidGotoLeftIterator.moveDown(); octreeIterator = avoidGotoLeftIterator; @@ -807,7 +815,7 @@ protected: const int nbCellToCompute = FMath::Min(chunckSize, numberOfCells-idxCell); for(int idxCellToCompute = idxCell ; idxCellToCompute < idxCell+nbCellToCompute ; ++idxCellToCompute){ - const int counter = tree->getInteractionNeighbors(neighbors, iterArray[idxCellToCompute].getCurrentGlobalCoordinate(), idxLevel); + const int counter = tree->getInteractionNeighbors(neighbors, iterArray[idxCellToCompute].getCurrentGlobalCoordinate(), idxLevel, separationCriteria); if(counter) myThreadkernels->M2L( iterArray[idxCellToCompute].getCurrentCell() , neighbors, counter, idxLevel); } } @@ -843,6 +851,8 @@ protected: // compute the second time // for each levels for(int idxLevel = FAbstractAlgorithm::upperWorkingLevel ; idxLevel < FAbstractAlgorithm::lowerWorkingLevel ; ++idxLevel ){ + const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeperationCriteria); + if(!procHasWorkAtLevel(idxLevel, idProcess)){ avoidGotoLeftIterator.moveDown(); octreeIterator = avoidGotoLeftIterator; @@ -900,15 +910,15 @@ protected: #pragma omp parallel { KernelClass * const myThreadkernels = kernels[omp_get_thread_num()]; - MortonIndex neighborsIndex[189]; - int neighborsPosition[189]; + MortonIndex neighborsIndex[/*189+26+1*/216]; + int neighborsPosition[/*189+26+1*/216]; const CellClass* neighbors[343]; #pragma omp for schedule(static) nowait for(int idxCell = 0 ; idxCell < numberOfCells ; ++idxCell){ // compute indexes memset(neighbors, 0, 343 * sizeof(CellClass*)); - const int counterNeighbors = iterArray[idxCell].getCurrentGlobalCoordinate().getInteractionNeighbors(idxLevel, neighborsIndex, neighborsPosition); + const int counterNeighbors = iterArray[idxCell].getCurrentGlobalCoordinate().getInteractionNeighbors(idxLevel, neighborsIndex, neighborsPosition, separationCriteria); int counter = 0; // does we receive this index from someone? diff --git a/Src/Core/FFmmAlgorithmThreadProcPeriodic.hpp b/Src/Core/FFmmAlgorithmThreadProcPeriodic.hpp index 54605f98e3dc902e74d0c47c2b44b9af756f14f6..e2a0ab116633fcc4e5cb503c9ba97e0f52735eb2 100644 --- a/Src/Core/FFmmAlgorithmThreadProcPeriodic.hpp +++ b/Src/Core/FFmmAlgorithmThreadProcPeriodic.hpp @@ -83,6 +83,8 @@ class FFmmAlgorithmThreadProcPeriodic : public FAbstractAlgorithm { const int OctreeHeight; + const int leafLevelSeperationCriteria; + public: struct Interval{ MortonIndex leftIndex; @@ -139,11 +141,13 @@ public: * An assert is launched if one of the arguments is null */ FFmmAlgorithmThreadProcPeriodic(const FMpi::FComm& inComm, OctreeClass* const inTree, - const int inUpperLevel = 2) + const int inUpperLevel = 2, const int inLeafLevelSeperationCriteria = 1) : tree(inTree) , kernels(nullptr), comm(inComm), nbLevelsAboveRoot(inUpperLevel), offsetRealTree(inUpperLevel + 3), numberOfLeafs(0), MaxThreads(omp_get_max_threads()), nbProcess(inComm.processCount()), idProcess(inComm.processId()), - OctreeHeight(tree->getHeight()),intervals(new Interval[inComm.processCount()]), + OctreeHeight(tree->getHeight()), + leafLevelSeperationCriteria(inLeafLevelSeperationCriteria), + intervals(new Interval[inComm.processCount()]), workingIntervalsPerLevel(new Interval[inComm.processCount() * tree->getHeight()]) { FAssertLF(tree, "tree cannot be null"); @@ -752,6 +756,7 @@ protected: typename OctreeClass::Iterator avoidGotoLeftIterator(octreeIterator); // for each levels for(int idxLevel = 1 ; idxLevel < OctreeHeight ; ++idxLevel ){ + if(!procHasWorkAtLevel(idxLevel, idProcess)){ avoidGotoLeftIterator.moveDown(); octreeIterator = avoidGotoLeftIterator; @@ -776,13 +781,13 @@ protected: // Which cell potentialy needs other data and in the same time // are potentialy needed by other - int neighborsPosition[189]; - MortonIndex neighborsIndexes[189]; + int neighborsPosition[/*189+26+1*/216]; + MortonIndex neighborsIndexes[/*189+26+1*/216]; for(int idxCell = 0 ; idxCell < numberOfCells ; ++idxCell){ // Find the M2L neigbors of a cell const int counter = getPeriodicInteractionNeighbors(iterArray[idxCell].getCurrentGlobalCoordinate(), idxLevel, - neighborsIndexes, neighborsPosition, AllDirs); + neighborsIndexes, neighborsPosition, AllDirs, leafLevelSeperationCriteria); memset(alreadySent, false, sizeof(bool) * nbProcess); bool needOther = false; @@ -907,6 +912,9 @@ protected: // for each levels for(int idxLevel = 1 ; idxLevel < OctreeHeight ; ++idxLevel ){ const int fackLevel = idxLevel + offsetRealTree; + + const int separationCriteria = (idxLevel != OctreeHeight-1 ? 1 : leafLevelSeperationCriteria); + if(!procHasWorkAtLevel(idxLevel, idProcess)){ avoidGotoLeftIterator.moveDown(); octreeIterator = avoidGotoLeftIterator; @@ -936,10 +944,9 @@ protected: const int nbCellToCompute = FMath::Min(chunckSize, numberOfCells-idxCell); for(int idxCellToCompute = idxCell ; idxCellToCompute < idxCell+nbCellToCompute ; ++idxCellToCompute){ - const int counter = tree-> - getPeriodicInteractionNeighbors(neighbors, + const int counter = tree->getPeriodicInteractionNeighbors(neighbors, iterArray[idxCellToCompute].getCurrentGlobalCoordinate(), - idxLevel, AllDirs); + idxLevel, AllDirs, separationCriteria); if(counter) myThreadkernels->M2L( iterArray[idxCellToCompute].getCurrentCell() , neighbors, counter, fackLevel); @@ -973,6 +980,9 @@ protected: // for each levels for(int idxLevel = 1 ; idxLevel < OctreeHeight ; ++idxLevel ){ const int fackLevel = idxLevel + offsetRealTree; + + const int separationCriteria = (fackLevel != OctreeHeight-1 ? 1 : leafLevelSeperationCriteria); + if(!procHasWorkAtLevel(idxLevel, idProcess)){ avoidGotoLeftIterator.moveDown(); octreeIterator = avoidGotoLeftIterator; @@ -1030,15 +1040,15 @@ protected: #pragma omp parallel { KernelClass * const myThreadkernels = kernels[omp_get_thread_num()]; - MortonIndex neighborsIndex[189]; - int neighborsPosition[189]; + MortonIndex neighborsIndex[/*189+26+1*/216]; + int neighborsPosition[/*189+26+1*/216]; const CellClass* neighbors[343]; #pragma omp for schedule(static) nowait for(int idxCell = 0 ; idxCell < numberOfCells ; ++idxCell){ // compute indexes memset(neighbors, 0, 343 * sizeof(CellClass*)); - const int counterNeighbors = getPeriodicInteractionNeighbors(iterArray[idxCell].getCurrentGlobalCoordinate(), idxLevel, neighborsIndex, neighborsPosition, AllDirs); + const int counterNeighbors = getPeriodicInteractionNeighbors(iterArray[idxCell].getCurrentGlobalCoordinate(), idxLevel, neighborsIndex, neighborsPosition, AllDirs, separationCriteria); //const int counterNeighbors = iterArray[idxCell].getCurrentGlobalCoordinate().getInteractionNeighbors(idxLevel, neighborsIndex, neighborsPosition); int counter = 0; @@ -1758,7 +1768,8 @@ protected: } - int getPeriodicInteractionNeighbors(const FTreeCoordinate& workingCell,const int inLevel, MortonIndex inNeighbors[189], int inNeighborsPosition[189], const int inDirection) const{ + int getPeriodicInteractionNeighbors(const FTreeCoordinate& workingCell,const int inLevel, MortonIndex inNeighbors[/*189+26+1*/216], int inNeighborsPosition[/*189+26+1*/216], + const int inDirection, const int neighSeparation) const{ // Then take each child of the parent's neighbors if not in directNeighbors // Father coordinate @@ -1770,7 +1781,7 @@ protected: // This is not on a border we can use normal interaction list method if( !(parentCell.getX() == 0 || parentCell.getY() == 0 || parentCell.getZ() == 0 || parentCell.getX() == boxLimite - 1 || parentCell.getY() == boxLimite - 1 || parentCell.getZ() == boxLimite - 1 ) ) { - return getInteractionNeighbors( workingCell, inLevel, inNeighbors, inNeighborsPosition); + return getInteractionNeighbors( workingCell, inLevel, inNeighbors, inNeighborsPosition, neighSeparation); } const int startX = (TestPeriodicCondition(inDirection, DirMinusX) || parentCell.getX() != 0 ?-1:0); @@ -1821,7 +1832,7 @@ protected: const int zdiff = ((otherParent.getZ()<<1) | (idxCousin&1)) - workingCell.getZ(); // Test if it is a direct neighbor - if(FMath::Abs(xdiff) > 1 || FMath::Abs(ydiff) > 1 || FMath::Abs(zdiff) > 1){ + if(FMath::Abs(xdiff) > neighSeparation || FMath::Abs(ydiff) > neighSeparation || FMath::Abs(zdiff) > neighSeparation){ // add to neighbors inNeighborsPosition[idxNeighbors] = (((xdiff+3) * 7) + (ydiff+3)) * 7 + zdiff + 3; inNeighbors[idxNeighbors++] = (mortonOtherParent << 3) | idxCousin; @@ -1835,7 +1846,8 @@ protected: return idxNeighbors; } - int getInteractionNeighbors(const FTreeCoordinate& workingCell,const int inLevel, MortonIndex inNeighbors[189], int inNeighborsPosition[189]) const{ + int getInteractionNeighbors(const FTreeCoordinate& workingCell,const int inLevel, MortonIndex inNeighbors[/*189+26+1*/216], int inNeighborsPosition[/*189+26+1*/216], + const int neighSeparation) const{ // Then take each child of the parent's neighbors if not in directNeighbors // Father coordinate @@ -1860,7 +1872,7 @@ protected: const int zdiff = ((otherParent.getZ()<<1) | (idxCousin&1)) - workingCell.getZ(); // Test if it is a direct neighbor - if(FMath::Abs(xdiff) > 1 || FMath::Abs(ydiff) > 1 || FMath::Abs(zdiff) > 1){ + if(FMath::Abs(xdiff) > neighSeparation || FMath::Abs(ydiff) > neighSeparation || FMath::Abs(zdiff) > neighSeparation){ // add to neighbors inNeighborsPosition[idxNeighbors] = ((( (xdiff+3) * 7) + (ydiff+3))) * 7 + zdiff + 3; inNeighbors[idxNeighbors++] = (mortonOtherParent << 3) | idxCousin; diff --git a/Src/Core/FFmmAlgorithmThreadTsm.hpp b/Src/Core/FFmmAlgorithmThreadTsm.hpp index 86434c6746c2f7b8e34c28be425be59a0f42fa24..6bb6a268a25c1f58c012b7909d14d61b0121b213 100644 --- a/Src/Core/FFmmAlgorithmThreadTsm.hpp +++ b/Src/Core/FFmmAlgorithmThreadTsm.hpp @@ -55,15 +55,17 @@ class FFmmAlgorithmThreadTsm : public FAbstractAlgorithm, public FAlgorithmTimer const int OctreeHeight; + const int leafLevelSeperationCriteria; + public: /** The constructor need the octree and the kernels used for computation * @param inTree the octree to work on * @param inKernels the kernels to call * An assert is launched if one of the arguments is null */ - FFmmAlgorithmThreadTsm(OctreeClass* const inTree, KernelClass* const inKernels) + FFmmAlgorithmThreadTsm(OctreeClass* const inTree, KernelClass* const inKernels, const int inLeafLevelSeperationCriteria = 1) : tree(inTree) , kernels(nullptr), iterArray(nullptr), - MaxThreads(omp_get_max_threads()) , OctreeHeight(tree->getHeight()) { + MaxThreads(omp_get_max_threads()) , OctreeHeight(tree->getHeight()), leafLevelSeperationCriteria(inLeafLevelSeperationCriteria) { FAssertLF(tree, "tree cannot be null"); @@ -248,6 +250,7 @@ protected: // for each levels for(int idxLevel = FAbstractAlgorithm::upperWorkingLevel ; idxLevel < FAbstractAlgorithm::lowerWorkingLevel ; ++idxLevel ){ FLOG(FTic counterTimeLevel); + const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeperationCriteria); int numberOfCells = 0; // for each cells @@ -270,7 +273,7 @@ protected: for(int idxCell = 0 ; idxCell < numberOfCells ; ++idxCell){ CellClass* const currentCell = iterArray[idxCell].getCurrentCell(); if(currentCell->hasTargetsChild()){ - const int counter = tree->getInteractionNeighbors(neighbors, iterArray[idxCell].getCurrentGlobalCoordinate(),idxLevel); + const int counter = tree->getInteractionNeighbors(neighbors, iterArray[idxCell].getCurrentGlobalCoordinate(), idxLevel, separationCriteria); if( counter ){ int counterWithSrc = 0; for(int idxRealNeighbors = 0 ; idxRealNeighbors < 343 ; ++idxRealNeighbors ){ diff --git a/Src/Core/FFmmAlgorithmTsm.hpp b/Src/Core/FFmmAlgorithmTsm.hpp index 2937a880bd59841a9e18e02e5086ea2c79a7f672..80f238d68465a3ec2905c8892ce30797e6ea322e 100644 --- a/Src/Core/FFmmAlgorithmTsm.hpp +++ b/Src/Core/FFmmAlgorithmTsm.hpp @@ -46,6 +46,8 @@ class FFmmAlgorithmTsm : public FAbstractAlgorithm{ const int OctreeHeight; + const int leafLevelSeperationCriteria; + FLOG(FTic counterTime); //< In case of debug: to count the elapsed time FLOG(FTic computationCounter); //< In case of debug: to count computation time @@ -55,8 +57,8 @@ public: * @param inKernels the kernels to call * An assert is launched if one of the arguments is null */ - FFmmAlgorithmTsm(OctreeClass* const inTree, KernelClass* const inKernels) - : tree(inTree) , kernels(inKernels) , OctreeHeight(tree->getHeight()){ + FFmmAlgorithmTsm(OctreeClass* const inTree, KernelClass* const inKernels, const int inLeafLevelSeperationCriteria = 1) + : tree(inTree) , kernels(inKernels) , OctreeHeight(tree->getHeight()), leafLevelSeperationCriteria(inLeafLevelSeperationCriteria){ FAssertLF(tree, "tree cannot be null"); FAssertLF(kernels, "kernels cannot be null"); @@ -198,13 +200,14 @@ protected: // for each levels for(int idxLevel = FAbstractAlgorithm::upperWorkingLevel ; idxLevel < FAbstractAlgorithm::lowerWorkingLevel ; ++idxLevel ){ FLOG(FTic counterTimeLevel); + const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeperationCriteria); // for each cells do{ FLOG(computationCounter.tic()); CellClass* const currentCell = octreeIterator.getCurrentCell(); if(currentCell->hasTargetsChild()){ - const int counter = tree->getInteractionNeighbors(neighbors, octreeIterator.getCurrentGlobalCoordinate(),idxLevel); + const int counter = tree->getInteractionNeighbors(neighbors, octreeIterator.getCurrentGlobalCoordinate(),idxLevel, separationCriteria); if( counter ){ int counterWithSrc = 0; for(int idxRealNeighbors = 0 ; idxRealNeighbors < 343 ; ++idxRealNeighbors ){ diff --git a/Src/GroupTree/Core/FGroupOfCellsDyn.hpp b/Src/GroupTree/Core/FGroupOfCellsDyn.hpp index a1123d2ee3bfb031a93e2cfbfa2ae586d056bd11..d04e089b7488d3d36a1a512f084e8701f71f317f 100644 --- a/Src/GroupTree/Core/FGroupOfCellsDyn.hpp +++ b/Src/GroupTree/Core/FGroupOfCellsDyn.hpp @@ -177,8 +177,8 @@ public: cellMultipoles = (unsigned char*)FAlignedMemory::AllocateBytes<32>(inNumberOfCells*cellSizes->poleCellClassSize); memset(cellMultipoles, 0, inNumberOfCells*cellSizes->poleCellClassSize); - cellLocals = (unsigned char*)FAlignedMemory::AllocateBytes<32>(inNumberOfCells*cellSizes->poleCellClassSize); - memset(cellLocals, 0, inNumberOfCells*cellSizes->poleCellClassSize); + cellLocals = (unsigned char*)FAlignedMemory::AllocateBytes<32>(inNumberOfCells*cellSizes->localCellClassSize); + memset(cellLocals, 0, inNumberOfCells*cellSizes->localCellClassSize); // Set all index to not used for(int idxCellPtr = 0 ; idxCellPtr < blockIndexesTableSize ; ++idxCellPtr){ @@ -314,15 +314,21 @@ public: } /** Allocate a new cell by calling its constructor */ - template<typename... CellConstructorParams> - void newCell(const MortonIndex inIndex, const int id, CellConstructorParams... args){ + void newCell(const MortonIndex inIndex, const int id, + std::function<void(const MortonIndex mindex, + unsigned char* symbBuff, const size_t symbSize, + unsigned char* upBuff, const size_t upSize, + unsigned char* downBuff, const size_t downSize, + const int level)> BuildCellFunc, + const int inLevel){ FAssertLF(isInside(inIndex)); FAssertLF(!exists(inIndex)); FAssertLF(id < blockHeader->blockIndexesTableSize); - CompositeCellClass cell(&blockCells[id*cellSizes->symbCellClassSize], - &cellMultipoles[id*cellSizes->poleCellClassSize], - &cellLocals[id*cellSizes->localCellClassSize]); - cell.init(args...); + BuildCellFunc(inIndex, + &blockCells[id*cellSizes->symbCellClassSize],cellSizes->symbCellClassSize, + &cellMultipoles[id*cellSizes->poleCellClassSize],cellSizes->poleCellClassSize, + &cellLocals[id*cellSizes->localCellClassSize],cellSizes->localCellClassSize, + inLevel); blockIndexesTable[inIndex-blockHeader->startingIndex] = id; } diff --git a/Src/GroupTree/Core/FGroupOfParticlesDyn.hpp b/Src/GroupTree/Core/FGroupOfParticlesDyn.hpp index 3bc30a900c3284548be20bd6e2518a13d07d3c4c..82d6022f246027bfad728d43ce068850b2c4c9f2 100644 --- a/Src/GroupTree/Core/FGroupOfParticlesDyn.hpp +++ b/Src/GroupTree/Core/FGroupOfParticlesDyn.hpp @@ -248,8 +248,8 @@ public: for(int idxLeafPtr = 0 ; idxLeafPtr < blockHeader->blockIndexesTableSize ; ++idxLeafPtr){ if(blockIndexesTable[idxLeafPtr] != LeafIsEmptyFlag){ const int id = blockIndexesTable[idxLeafPtr]; - ParticlesAttachedClass leaf(symbPart + leafHeader[id].offSetSymb, - (downPart?downPart + leafHeader[id].offSetDown : nullptr)); + ParticlesAttachedClass leaf( (leafHeader[id].sizeSymb? symbPart + leafHeader[id].offSetSymb : nullptr), + (downPart && leafHeader[id].sizeDown ?downPart + leafHeader[id].offSetDown : nullptr) ); function(&leaf); } } @@ -261,11 +261,33 @@ public: ParticlesAttachedClass getLeaf(const MortonIndex leafIndex){ if(blockIndexesTable[leafIndex - blockHeader->startingIndex] != LeafIsEmptyFlag){ const int id = blockIndexesTable[leafIndex - blockHeader->startingIndex]; - return ParticlesAttachedClass(symbPart + leafHeader[id].offSetSymb, - (downPart?downPart + leafHeader[id].offSetDown : nullptr)); + return ParticlesAttachedClass((leafHeader[id].sizeSymb? symbPart + leafHeader[id].offSetSymb : nullptr), + (downPart && leafHeader[id].sizeDown ?downPart + leafHeader[id].offSetDown : nullptr) ); } return ParticlesAttachedClass(); } + + /** Return the buffer for a leaf or null if it does not exist */ + unsigned char* getLeafSymbBuffer(const MortonIndex leafIndex){ + if(blockIndexesTable[leafIndex - blockHeader->startingIndex] != LeafIsEmptyFlag){ + const int id = blockIndexesTable[leafIndex - blockHeader->startingIndex]; + if(leafHeader[id].sizeSymb){ + return (symbPart + leafHeader[id].offSetSymb); + } + } + return nullptr; + } + + /** Return the buffer for a leaf or null if it does not exist */ + unsigned char* getLeafDownBuffer(const MortonIndex leafIndex){ + if(blockIndexesTable[leafIndex - blockHeader->startingIndex] != LeafIsEmptyFlag){ + const int id = blockIndexesTable[leafIndex - blockHeader->startingIndex]; + if(leafHeader[id].sizeDown){ + return (downPart?downPart + leafHeader[id].offSetDown : nullptr); + } + } + return nullptr; + } }; #endif // FGROUPOFPARTICLESDYN_HPP diff --git a/Src/GroupTree/Core/FGroupSeqAlgorithm.hpp b/Src/GroupTree/Core/FGroupSeqAlgorithm.hpp index e261f4c8996941b1335e8ead8bc56469a162de0f..fadf2a150c795d51c7b5f142a6cb5c65f3c11629 100644 --- a/Src/GroupTree/Core/FGroupSeqAlgorithm.hpp +++ b/Src/GroupTree/Core/FGroupSeqAlgorithm.hpp @@ -17,7 +17,7 @@ #include <vector> template <class OctreeClass, class CellContainerClass, class CellClass, class KernelClass, class ParticleGroupClass, class ParticleContainerClass> -class FGroupSeqAlgorithm { +class FGroupSeqAlgorithm : public FAbstractAlgorithm { protected: const int MaxThreads; //< The number of threads OctreeClass*const tree; //< The Tree @@ -28,13 +28,19 @@ public: FAssertLF(tree, "tree cannot be null"); FAssertLF(kernels, "kernels cannot be null"); + FAbstractAlgorithm::setNbLevelsInTree(tree->getHeight()); + FLOG(FLog::Controller << "FGroupSeqAlgorithm (Max Thread " << MaxThreads << ")\n"); } ~FGroupSeqAlgorithm(){ } - void execute(const unsigned operationsToProceed = FFmmNearAndFarFields){ +protected: + /** + * Runs the complete algorithm. + */ + void executeCore(const unsigned operationsToProceed) override { FLOG( FLog::Controller << "\tStart FGroupSeqAlgorithm\n" ); if(operationsToProceed & FFmmP2M) bottomPass(); @@ -45,10 +51,11 @@ public: if(operationsToProceed & FFmmL2L) downardPass(); - if( (operationsToProceed & FFmmP2P) || (operationsToProceed & FFmmL2P) ) directPass(); + if( (operationsToProceed & FFmmP2P) || (operationsToProceed & FFmmL2P) ){ + directPass((operationsToProceed & FFmmP2P), (operationsToProceed & FFmmL2P)); + } } -protected: void bottomPass(){ FLOG( FTic timer; ); typename OctreeClass::ParticleGroupIterator iterParticles = tree->leavesBegin(); @@ -83,7 +90,7 @@ protected: void upwardPass(){ FLOG( FTic timer; ); - for(int idxLevel = tree->getHeight()-2 ; idxLevel >= 2 ; --idxLevel){ + for(int idxLevel = FMath::Min(tree->getHeight() - 2, FAbstractAlgorithm::lowerWorkingLevel - 1) ; idxLevel >= FAbstractAlgorithm::upperWorkingLevel ; --idxLevel){ typename OctreeClass::CellGroupIterator iterCells = tree->cellsBegin(idxLevel); const typename OctreeClass::CellGroupIterator endCells = tree->cellsEnd(idxLevel); @@ -133,7 +140,7 @@ protected: void transferPass(){ FLOG( FTic timer; ); - for(int idxLevel = tree->getHeight()-1 ; idxLevel >= 2 ; --idxLevel){ + for(int idxLevel = FAbstractAlgorithm::lowerWorkingLevel-1 ; idxLevel >= FAbstractAlgorithm::upperWorkingLevel ; --idxLevel){ typename OctreeClass::CellGroupIterator iterCells = tree->cellsBegin(idxLevel); const typename OctreeClass::CellGroupIterator endCells = tree->cellsEnd(idxLevel); @@ -238,7 +245,7 @@ protected: void downardPass(){ FLOG( FTic timer; ); - for(int idxLevel = 2 ; idxLevel <= tree->getHeight()-2 ; ++idxLevel){ + for(int idxLevel = FAbstractAlgorithm::upperWorkingLevel ; idxLevel < FAbstractAlgorithm::lowerWorkingLevel - 1 ; ++idxLevel){ typename OctreeClass::CellGroupIterator iterCells = tree->cellsBegin(idxLevel); const typename OctreeClass::CellGroupIterator endCells = tree->cellsEnd(idxLevel); @@ -284,9 +291,9 @@ protected: FLOG( FLog::Controller << "\t\t downardPass in " << timer.tacAndElapsed() << "s\n" ); } - void directPass(){ + void directPass(const bool p2pEnabled, const bool l2pEnabled){ FLOG( FTic timer; ); - { + if(l2pEnabled){ typename OctreeClass::ParticleGroupIterator iterParticles = tree->leavesBegin(); const typename OctreeClass::ParticleGroupIterator endParticles = tree->leavesEnd(); @@ -314,7 +321,7 @@ protected: FAssertLF(iterParticles == endParticles && iterCells == endCells); } - { + if(p2pEnabled){ typename OctreeClass::ParticleGroupIterator iterParticles = tree->leavesBegin(); const typename OctreeClass::ParticleGroupIterator endParticles = tree->leavesEnd(); diff --git a/Src/GroupTree/Core/FGroupTaskAlgorithm.hpp b/Src/GroupTree/Core/FGroupTaskAlgorithm.hpp index cf1b0bcab5696a13f6770999125e98d922eee29c..36cafc45f781d34adf0d1593611bd39fcd31a330 100644 --- a/Src/GroupTree/Core/FGroupTaskAlgorithm.hpp +++ b/Src/GroupTree/Core/FGroupTaskAlgorithm.hpp @@ -19,7 +19,7 @@ #include <omp.h> template <class OctreeClass, class CellContainerClass, class CellClass, class KernelClass, class ParticleGroupClass, class ParticleContainerClass> -class FGroupTaskAlgorithm { +class FGroupTaskAlgorithm : public FAbstractAlgorithm { protected: template <class OtherBlockClass> struct BlockInteractions{ @@ -40,6 +40,8 @@ public: FAssertLF(tree, "tree cannot be null"); FAssertLF(inKernels, "kernels cannot be null"); + FAbstractAlgorithm::setNbLevelsInTree(tree->getHeight()); + kernels = new KernelClass*[MaxThreads]; #pragma omp parallel for schedule(static) num_threads(MaxThreads) for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread){ @@ -61,7 +63,11 @@ public: delete[] kernels; } - void execute(const unsigned operationsToProceed = FFmmNearAndFarFields){ +protected: + /** + * Runs the complete algorithm. + */ + void executeCore(const unsigned operationsToProceed) override { FLOG( FLog::Controller << "\tStart FGroupTaskAlgorithm\n" ); #pragma omp parallel num_threads(MaxThreads) @@ -101,7 +107,6 @@ public: } } -protected: /** * This function is creating the interactions vector between blocks. * It fills externalInteractionsAllLevel and externalInteractionsLeafLevel. @@ -308,7 +313,7 @@ protected: void upwardPass(){ FLOG( FTic timer; ); - for(int idxLevel = tree->getHeight()-2 ; idxLevel >= 2 ; --idxLevel){ + for(int idxLevel = FMath::Min(tree->getHeight() - 2, FAbstractAlgorithm::lowerWorkingLevel - 1) ; idxLevel >= FAbstractAlgorithm::upperWorkingLevel ; --idxLevel){ typename OctreeClass::CellGroupIterator iterCells = tree->cellsBegin(idxLevel); const typename OctreeClass::CellGroupIterator endCells = tree->cellsEnd(idxLevel); @@ -388,7 +393,7 @@ protected: void transferPass(){ FLOG( FTic timer; ); FLOG( FTic timerInBlock; FTic timerOutBlock; ); - for(int idxLevel = tree->getHeight()-1 ; idxLevel >= 2 ; --idxLevel){ + for(int idxLevel = FAbstractAlgorithm::lowerWorkingLevel-1 ; idxLevel >= FAbstractAlgorithm::upperWorkingLevel ; --idxLevel){ FLOG( timerInBlock.tic() ); { typename OctreeClass::CellGroupIterator iterCells = tree->cellsBegin(idxLevel); @@ -499,7 +504,7 @@ protected: void downardPass(){ FLOG( FTic timer; ); - for(int idxLevel = 2 ; idxLevel <= tree->getHeight()-2 ; ++idxLevel){ + for(int idxLevel = FAbstractAlgorithm::upperWorkingLevel ; idxLevel < FAbstractAlgorithm::lowerWorkingLevel - 1 ; ++idxLevel){ typename OctreeClass::CellGroupIterator iterCells = tree->cellsBegin(idxLevel); const typename OctreeClass::CellGroupIterator endCells = tree->cellsEnd(idxLevel); diff --git a/Src/GroupTree/Core/FGroupTaskDepAlgorithm.hpp b/Src/GroupTree/Core/FGroupTaskDepAlgorithm.hpp index a7ebd08cb2634436a6d79ae48cf565e9d7cbee18..80f7241799d9abb39f065bad3e5b4e35bc028254 100644 --- a/Src/GroupTree/Core/FGroupTaskDepAlgorithm.hpp +++ b/Src/GroupTree/Core/FGroupTaskDepAlgorithm.hpp @@ -21,7 +21,7 @@ template <class OctreeClass, class CellContainerClass, class CellClass, class SymboleCellClass, class PoleCellClass, class LocalCellClass, class KernelClass, class ParticleGroupClass, class ParticleContainerClass> -class FGroupTaskDepAlgorithm { +class FGroupTaskDepAlgorithm : public FAbstractAlgorithm { protected: template <class OtherBlockClass> struct BlockInteractions{ @@ -42,6 +42,8 @@ public: FAssertLF(tree, "tree cannot be null"); FAssertLF(inKernels, "kernels cannot be null"); + FAbstractAlgorithm::setNbLevelsInTree(tree->getHeight()); + kernels = new KernelClass*[MaxThreads]; #pragma omp parallel for schedule(static) num_threads(MaxThreads) for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread){ @@ -63,7 +65,11 @@ public: delete[] kernels; } - void execute(const unsigned operationsToProceed = FFmmNearAndFarFields){ +protected: + /** + * Runs the complete algorithm. + */ + void executeCore(const unsigned operationsToProceed) override { FLOG( FLog::Controller << "\tStart FGroupTaskDepAlgorithm\n" ); #pragma omp parallel num_threads(MaxThreads) @@ -96,7 +102,7 @@ public: } } -protected: + /** * This function is creating the interactions vector between blocks. * It fills externalInteractionsAllLevel and externalInteractionsLeafLevel. @@ -304,7 +310,7 @@ protected: void upwardPass(){ FLOG( FTic timer; ); - for(int idxLevel = tree->getHeight()-2 ; idxLevel >= 2 ; --idxLevel){ + for(int idxLevel = FMath::Min(tree->getHeight() - 2, FAbstractAlgorithm::lowerWorkingLevel - 1) ; idxLevel >= FAbstractAlgorithm::upperWorkingLevel ; --idxLevel){ typename OctreeClass::CellGroupIterator iterCells = tree->cellsBegin(idxLevel); const typename OctreeClass::CellGroupIterator endCells = tree->cellsEnd(idxLevel); @@ -388,7 +394,7 @@ protected: void transferPass(){ FLOG( FTic timer; ); FLOG( FTic timerInBlock; FTic timerOutBlock; ); - for(int idxLevel = tree->getHeight()-1 ; idxLevel >= 2 ; --idxLevel){ + for(int idxLevel = FAbstractAlgorithm::lowerWorkingLevel-1 ; idxLevel >= FAbstractAlgorithm::upperWorkingLevel ; --idxLevel){ FLOG( timerInBlock.tic() ); { typename OctreeClass::CellGroupIterator iterCells = tree->cellsBegin(idxLevel); @@ -502,7 +508,7 @@ protected: void downardPass(){ FLOG( FTic timer; ); - for(int idxLevel = 2 ; idxLevel <= tree->getHeight()-2 ; ++idxLevel){ + for(int idxLevel = FAbstractAlgorithm::upperWorkingLevel ; idxLevel < FAbstractAlgorithm::lowerWorkingLevel - 1 ; ++idxLevel){ typename OctreeClass::CellGroupIterator iterCells = tree->cellsBegin(idxLevel); const typename OctreeClass::CellGroupIterator endCells = tree->cellsEnd(idxLevel); diff --git a/Src/GroupTree/Core/FGroupTaskStarpuAlgorithm.hpp b/Src/GroupTree/Core/FGroupTaskStarpuAlgorithm.hpp index ff31b802f0cec54577fe51839a1f853cf72f3c91..abc6fa653cccae2c7c8431975b904a42331894f9 100644 --- a/Src/GroupTree/Core/FGroupTaskStarpuAlgorithm.hpp +++ b/Src/GroupTree/Core/FGroupTaskStarpuAlgorithm.hpp @@ -48,7 +48,7 @@ template <class OctreeClass, class CellContainerClass, class KernelClass, class , class StarPUOpenClWrapperClass = FStarPUOpenClWrapper<KernelClass, FOpenCLDeviceWrapper<KernelClass>> #endif > -class FGroupTaskStarPUAlgorithm { +class FGroupTaskStarPUAlgorithm : public FAbstractAlgorithm { protected: typedef FGroupTaskStarPUAlgorithm<OctreeClass, CellContainerClass, KernelClass, ParticleGroupClass, StarPUCpuWrapperClass #ifdef SCALFMM_ENABLE_CUDA_KERNEL @@ -134,6 +134,8 @@ public: FAssertLF(tree, "tree cannot be null"); FAssertLF(inKernels, "kernels cannot be null"); + FAbstractAlgorithm::setNbLevelsInTree(tree->getHeight()); + struct starpu_conf conf; FAssertLF(starpu_conf_init(&conf) == 0); FStarPUFmmPriorities::Controller().init(&conf, tree->getHeight(), inKernels); @@ -235,7 +237,11 @@ public: starpu_shutdown(); } - void execute(const unsigned operationsToProceed = FFmmNearAndFarFields){ +protected: + /** + * Runs the complete algorithm. + */ + void executeCore(const unsigned operationsToProceed) override { FLOG( FLog::Controller << "\tStart FGroupTaskStarPUAlgorithm\n" ); const bool directOnly = (tree->getHeight() <= 2); @@ -263,7 +269,7 @@ public: starpu_pause(); } -protected: + void initCodelet(){ memset(&p2m_cl, 0, sizeof(p2m_cl)); #ifdef STARPU_USE_CPU @@ -738,7 +744,7 @@ protected: void upwardPass(){ FLOG( FTic timer; ); - for(int idxLevel = tree->getHeight()-2 ; idxLevel >= 2 ; --idxLevel){ + for(int idxLevel = FMath::Min(tree->getHeight() - 2, FAbstractAlgorithm::lowerWorkingLevel - 1) ; idxLevel >= FAbstractAlgorithm::upperWorkingLevel ; --idxLevel){ int idxSubGroup = 0; for(int idxGroup = 0 ; idxGroup < tree->getNbCellGroupAtLevel(idxLevel) ; ++idxGroup){ @@ -799,7 +805,7 @@ protected: void transferPass(){ FLOG( FTic timer; ); FLOG( FTic timerInBlock; FTic timerOutBlock; ); - for(int idxLevel = tree->getHeight()-1 ; idxLevel >= 2 ; --idxLevel){ + for(int idxLevel = FAbstractAlgorithm::lowerWorkingLevel-1 ; idxLevel >= FAbstractAlgorithm::upperWorkingLevel ; --idxLevel){ FLOG( timerInBlock.tic() ); for(int idxGroup = 0 ; idxGroup < tree->getNbCellGroupAtLevel(idxLevel) ; ++idxGroup){ starpu_insert_task(&m2l_cl_in, @@ -848,7 +854,7 @@ protected: void downardPass(){ FLOG( FTic timer; ); - for(int idxLevel = 2 ; idxLevel <= tree->getHeight()-2 ; ++idxLevel){ + for(int idxLevel = FAbstractAlgorithm::upperWorkingLevel ; idxLevel < FAbstractAlgorithm::lowerWorkingLevel - 1 ; ++idxLevel){ int idxSubGroup = 0; for(int idxGroup = 0 ; idxGroup < tree->getNbCellGroupAtLevel(idxLevel) ; ++idxGroup){ diff --git a/Src/GroupTree/Core/FGroupTaskStarpuMpiAlgorithm.hpp b/Src/GroupTree/Core/FGroupTaskStarpuMpiAlgorithm.hpp index a2d4a1ed0f49ff5666c875f451881f697f398880..c6301bec83d7fa20233dfdce3b4db2230371491e 100644 --- a/Src/GroupTree/Core/FGroupTaskStarpuMpiAlgorithm.hpp +++ b/Src/GroupTree/Core/FGroupTaskStarpuMpiAlgorithm.hpp @@ -54,7 +54,7 @@ template <class OctreeClass, class CellContainerClass, class KernelClass, class , class StarPUOpenClWrapperClass = FStarPUOpenClWrapper<KernelClass, FOpenCLDeviceWrapper<KernelClass>> #endif > -class FGroupTaskStarPUMpiAlgorithm { +class FGroupTaskStarPUMpiAlgorithm : public FAbstractAlgorithm { protected: typedef FGroupTaskStarPUMpiAlgorithm<OctreeClass, CellContainerClass, KernelClass, ParticleGroupClass, StarPUCpuWrapperClass #ifdef SCALFMM_ENABLE_CUDA_KERNEL @@ -151,6 +151,8 @@ public: FAssertLF(tree, "tree cannot be null"); FAssertLF(inKernels, "kernels cannot be null"); + FAbstractAlgorithm::setNbLevelsInTree(tree->getHeight()); + struct starpu_conf conf; FAssertLF(starpu_conf_init(&conf) == 0); FStarPUFmmPriorities::Controller().init(&conf, tree->getHeight(), inKernels); @@ -254,7 +256,11 @@ public: starpu_shutdown(); } - void execute(const unsigned operationsToProceed = FFmmNearAndFarFields){ +protected: + /** + * Runs the complete algorithm. + */ + void executeCore(const unsigned operationsToProceed) override { FLOG( FLog::Controller << "\tStart FGroupTaskStarPUMpiAlgorithm\n" ); const bool directOnly = (tree->getHeight() <= 2); @@ -291,7 +297,7 @@ public: starpu_pause(); } -protected: + void initCodelet(){ memset(&p2m_cl, 0, sizeof(p2m_cl)); #ifdef STARPU_USE_CPU @@ -921,7 +927,7 @@ protected: FAssertLF(tree->getHeight() == int(remoteCellGroups.size())); const bool directOnly = (tree->getHeight() <= 2); if(!directOnly){ - for(int idxLevel = 0 ; idxLevel < tree->getHeight() ; ++idxLevel){ + for(int idxLevel = FAbstractAlgorithm::upperWorkingLevel ; idxLevel < FAbstractAlgorithm::lowerWorkingLevel ; ++idxLevel){ for(int idxHandle = 0 ; idxHandle < int(remoteCellGroups[idxLevel].size()) ; ++idxHandle){ if(remoteCellGroups[idxLevel][idxHandle].ptrSymb){ FAssertLF(remoteCellGroups[idxLevel][idxHandle].ptrUp); @@ -1328,7 +1334,7 @@ protected: void upwardPass(){ FLOG( FTic timer; ); - for(int idxLevel = tree->getHeight()-2 ; idxLevel >= 2 ; --idxLevel){ + for(int idxLevel = FMath::Min(tree->getHeight() - 2, FAbstractAlgorithm::lowerWorkingLevel - 1) ; idxLevel >= FAbstractAlgorithm::upperWorkingLevel ; --idxLevel){ int idxSubGroup = 0; for(int idxGroup = 0 ; idxGroup < tree->getNbCellGroupAtLevel(idxLevel) @@ -1507,7 +1513,7 @@ protected: void transferPassMpi(){ FLOG( FTic timer; ); - for(int idxLevel = tree->getHeight()-1 ; idxLevel >= 2 ; --idxLevel){ + for(int idxLevel = FAbstractAlgorithm::lowerWorkingLevel-1 ; idxLevel >= FAbstractAlgorithm::upperWorkingLevel ; --idxLevel){ for(int idxGroup = 0 ; idxGroup < tree->getNbCellGroupAtLevel(idxLevel) ; ++idxGroup){ for(int idxInteraction = 0; idxInteraction < int(externalInteractionsAllLevelMpi[idxLevel][idxGroup].size()) ; ++idxInteraction){ const int interactionid = externalInteractionsAllLevelMpi[idxLevel][idxGroup][idxInteraction].otherBlockId; @@ -1537,7 +1543,7 @@ protected: void transferPass(){ FLOG( FTic timer; ); FLOG( FTic timerInBlock; FTic timerOutBlock; ); - for(int idxLevel = tree->getHeight()-1 ; idxLevel >= 2 ; --idxLevel){ + for(int idxLevel = FAbstractAlgorithm::lowerWorkingLevel-1 ; idxLevel >= FAbstractAlgorithm::upperWorkingLevel ; --idxLevel){ FLOG( timerInBlock.tic() ); for(int idxGroup = 0 ; idxGroup < tree->getNbCellGroupAtLevel(idxLevel) ; ++idxGroup){ starpu_insert_task(&m2l_cl_in, @@ -1586,7 +1592,7 @@ protected: void downardPass(){ FLOG( FTic timer; ); - for(int idxLevel = 2 ; idxLevel <= tree->getHeight()-2 ; ++idxLevel){ + for(int idxLevel = FAbstractAlgorithm::upperWorkingLevel ; idxLevel < FAbstractAlgorithm::lowerWorkingLevel - 1 ; ++idxLevel){ ///////////////////////////////////////////////////////////// // Exchange for MPI ///////////////////////////////////////////////////////////// diff --git a/Src/GroupTree/Core/FGroupTreeDyn.hpp b/Src/GroupTree/Core/FGroupTreeDyn.hpp index c3d5ccf88bd01b7704eada96970d24e6eba4951c..7d9bcaffe3055ed78aaae831c08b27a24fdad9b6 100644 --- a/Src/GroupTree/Core/FGroupTreeDyn.hpp +++ b/Src/GroupTree/Core/FGroupTreeDyn.hpp @@ -62,7 +62,12 @@ public: template<class OctreeClass> FGroupTreeDyn(const int inTreeHeight, const int inNbElementsPerBlock, OctreeClass*const inOctreeSrc, const size_t inSymbSizePerLevel[], const size_t inPoleSizePerLevel[], const size_t inLocalSizePerLevel[], - std::function<void(const MortonIndex, const void*, size_t*, size_t*)> GetSizeFunc) + std::function<void(const MortonIndex, const void*, size_t*, size_t*)> GetSizeFunc, + std::function<void(const MortonIndex mindex, + unsigned char* symbBuff, const size_t symbSize, + unsigned char* upBuff, const size_t upSize, + unsigned char* downBuff, const size_t downSize, + const int level)> BuildCellFunc) : treeHeight(inTreeHeight), nbElementsPerBlock(inNbElementsPerBlock), cellBlocksPerLevel(nullptr), boxCenter(inOctreeSrc->getBoxCenter()), boxCorner(inOctreeSrc->getBoxCenter(),-(inOctreeSrc->getBoxWidth()/2)), boxWidth(inOctreeSrc->getBoxWidth()), boxWidthAtLeafLevel(inOctreeSrc->getBoxWidth()/FReal(1<<(inTreeHeight-1))){ @@ -91,8 +96,8 @@ public: // Create a block with the apropriate parameters CellGroupClass*const newBlock = new CellGroupClass(blockIteratorInOctree.getCurrentGlobalIndex(), - octreeIterator.getCurrentGlobalIndex()+1, - sizeOfBlock, inSymbSizePerLevel[idxLevel], + octreeIterator.getCurrentGlobalIndex()+1, + sizeOfBlock, inSymbSizePerLevel[idxLevel], inPoleSizePerLevel[idxLevel], inLocalSizePerLevel[idxLevel]); { typename OctreeClass::Iterator blockIteratorCellInOctree = blockIteratorInOctree; @@ -102,7 +107,7 @@ public: const MortonIndex newNodeIndex = blockIteratorCellInOctree.getCurrentCell()->getMortonIndex(); const FTreeCoordinate newNodeCoordinate = blockIteratorCellInOctree.getCurrentCell()->getCoordinate(); // Add cell - newBlock->newCell(newNodeIndex, cellIdInBlock); + newBlock->newCell(newNodeIndex, cellIdInBlock, BuildCellFunc, idxLevel); CompositeCellClass newNode = newBlock->getCompleteCell(newNodeIndex); newNode.setMortonIndex(newNodeIndex); @@ -121,8 +126,8 @@ public: int cellIdInBlock = 0; while(cellIdInBlock != sizeOfBlock){ GetSizeFunc(blockIteratorLeafInOctree.getCurrentCell()->getMortonIndex(), - blockIteratorLeafInOctree.getCurrentLeaf()->getSrc(), - &symbSizePerLeaf[cellIdInBlock],&downSizePerDown[cellIdInBlock]); + blockIteratorLeafInOctree.getCurrentLeaf()->getSrc(), + &symbSizePerLeaf[cellIdInBlock],&downSizePerDown[cellIdInBlock]); cellIdInBlock += 1; blockIteratorLeafInOctree.moveRight(); @@ -130,8 +135,8 @@ public: } ParticleGroupClass*const newParticleBlock = new ParticleGroupClass(blockIteratorInOctree.getCurrentGlobalIndex(), - octreeIterator.getCurrentGlobalIndex()+1, - sizeOfBlock, symbSizePerLeaf.get(), downSizePerDown.get()); + octreeIterator.getCurrentGlobalIndex()+1, + sizeOfBlock, symbSizePerLeaf.get(), downSizePerDown.get()); { typename OctreeClass::Iterator blockIteratorLeafInOctree = blockIteratorInOctree; // Initialize each cell of the block @@ -173,8 +178,8 @@ public: // Create a block with the apropriate parameters CellGroupClass*const newBlock = new CellGroupClass(blockIteratorInOctree.getCurrentGlobalIndex(), - octreeIterator.getCurrentGlobalIndex()+1, - sizeOfBlock, inSymbSizePerLevel[idxLevel], + octreeIterator.getCurrentGlobalIndex()+1, + sizeOfBlock, inSymbSizePerLevel[idxLevel], inPoleSizePerLevel[idxLevel], inLocalSizePerLevel[idxLevel]); // Initialize each cell of the block @@ -182,7 +187,7 @@ public: while(cellIdInBlock != sizeOfBlock){ const MortonIndex newNodeIndex = blockIteratorInOctree.getCurrentCell()->getMortonIndex(); const FTreeCoordinate newNodeCoordinate = blockIteratorInOctree.getCurrentCell()->getCoordinate(); - newBlock->newCell(newNodeIndex, cellIdInBlock); + newBlock->newCell(newNodeIndex, cellIdInBlock, BuildCellFunc, idxLevel); CompositeCellClass newNode = newBlock->getCompleteCell(newNodeIndex); newNode.setMortonIndex(newNodeIndex); @@ -212,15 +217,24 @@ public: * If no limite give inLeftLimite = -1 */ FGroupTreeDyn(const int inTreeHeight, const FReal inBoxWidth, const FPoint<FReal>& inBoxCenter, - const int inNbElementsPerBlock, const size_t inSymbSizePerLevel[], + const int inNbElementsPerBlock, const size_t inSymbSizePerLevel[], const size_t inPoleSizePerLevel[], const size_t inLocalSizePerLevel[], - UnknownDescriptor<FReal> inParticlesContainer[], const FSize nbParticles, - std::function<void(const MortonIndex, const UnknownDescriptor<FReal>[], - const FSize, size_t*, size_t*)> GetSizeFunc, - const bool particlesAreSorted = false, MortonIndex inLeftLimite = -1): - treeHeight(inTreeHeight),nbElementsPerBlock(inNbElementsPerBlock),cellBlocksPerLevel(nullptr), - boxCenter(inBoxCenter), boxCorner(inBoxCenter,-(inBoxWidth/2)), boxWidth(inBoxWidth), - boxWidthAtLeafLevel(inBoxWidth/FReal(1<<(inTreeHeight-1))){ + UnknownDescriptor<FReal> inParticlesContainer[], const FSize nbParticles, + std::function<void(const MortonIndex, const UnknownDescriptor<FReal>[], + const FSize, size_t*, size_t*)> GetSizeFunc, + std::function<void(const MortonIndex, const UnknownDescriptor<FReal> [], + const FSize , + unsigned char* , const size_t, + unsigned char* , const size_t)> InitLeafFunc, + std::function<void(const MortonIndex mindex, + unsigned char* symbBuff, const size_t symbSize, + unsigned char* upBuff, const size_t upSize, + unsigned char* downBuff, const size_t downSize, + const int level)> BuildCellFunc, + const bool particlesAreSorted = false, MortonIndex inLeftLimite = -1): + treeHeight(inTreeHeight),nbElementsPerBlock(inNbElementsPerBlock),cellBlocksPerLevel(nullptr), + boxCenter(inBoxCenter), boxCorner(inBoxCenter,-(inBoxWidth/2)), boxWidth(inBoxWidth), + boxWidthAtLeafLevel(inBoxWidth/FReal(1<<(inTreeHeight-1))){ cellBlocksPerLevel = new std::vector<CellGroupClass*>[treeHeight]; @@ -231,8 +245,8 @@ public: if(particlesAreSorted == false){ for(FSize idxPart = 0 ; idxPart < nbParticles ; ++idxPart){ const FTreeCoordinate host = FCoordinateComputer::GetCoordinateFromPositionAndCorner<FReal>(this->boxCorner, this->boxWidth, - treeHeight, - inParticlesContainer[idxPart].pos); + treeHeight, + inParticlesContainer[idxPart].pos); const MortonIndex particleIndex = host.getMortonIndex(treeHeight-1); inParticlesContainer[idxPart].mindex = particleIndex; inParticlesContainer[idxPart].originalIndex = idxPart; @@ -276,12 +290,12 @@ public: // Create a group CellGroupClass*const newBlock = new CellGroupClass(currentBlockIndexes[0], - currentBlockIndexes[sizeOfBlock-1]+1, - sizeOfBlock, inSymbSizePerLevel[idxLevel], - inPoleSizePerLevel[idxLevel], inLocalSizePerLevel[idxLevel]); + currentBlockIndexes[sizeOfBlock-1]+1, + sizeOfBlock, inSymbSizePerLevel[idxLevel], + inPoleSizePerLevel[idxLevel], inLocalSizePerLevel[idxLevel]); { for(int cellIdInBlock = 0; cellIdInBlock != sizeOfBlock ; ++cellIdInBlock){ - newBlock->newCell(currentBlockIndexes[cellIdInBlock], cellIdInBlock); + newBlock->newCell(currentBlockIndexes[cellIdInBlock], cellIdInBlock, BuildCellFunc, idxLevel); CompositeCellClass newNode = newBlock->getCompleteCell(currentBlockIndexes[cellIdInBlock]); newNode.setMortonIndex(currentBlockIndexes[cellIdInBlock]); @@ -302,8 +316,8 @@ public: } ParticleGroupClass*const newParticleBlock = new ParticleGroupClass(currentBlockIndexes[0], - currentBlockIndexes[sizeOfBlock-1]+1, - sizeOfBlock, symbSizePerLeaf.get(), downSizePerDown.get()); + currentBlockIndexes[sizeOfBlock-1]+1, + sizeOfBlock, symbSizePerLeaf.get(), downSizePerDown.get()); // Init cells FSize offsetParts = firstParticle; @@ -311,10 +325,11 @@ public: // Add leaf newParticleBlock->newLeaf(currentBlockIndexes[cellIdInBlock], cellIdInBlock); - BasicAttachedClass attachedLeaf = newParticleBlock->template getLeaf<BasicAttachedClass>(currentBlockIndexes[cellIdInBlock]); - attachedLeaf.init(currentBlockIndexes[cellIdInBlock], &inParticlesContainer[offsetParts], - nbParticlesPerLeaf[cellIdInBlock], - symbSizePerLeaf[cellIdInBlock], downSizePerDown[cellIdInBlock]); + InitLeafFunc(currentBlockIndexes[cellIdInBlock], &inParticlesContainer[offsetParts], + nbParticlesPerLeaf[cellIdInBlock], + newParticleBlock->getLeafSymbBuffer(currentBlockIndexes[cellIdInBlock]), symbSizePerLeaf[cellIdInBlock], + newParticleBlock->getLeafDownBuffer(currentBlockIndexes[cellIdInBlock]), downSizePerDown[cellIdInBlock]); + offsetParts += nbParticlesPerLeaf[cellIdInBlock]; } @@ -376,12 +391,12 @@ public: if(sizeOfBlock == nbElementsPerBlock || (sizeOfBlock && iterChildCells == iterChildEndCells)){ // Create a group CellGroupClass*const newBlock = new CellGroupClass(currentBlockIndexes[0], - currentBlockIndexes[sizeOfBlock-1]+1, - sizeOfBlock,inSymbSizePerLevel[idxLevel], - inPoleSizePerLevel[idxLevel], inLocalSizePerLevel[idxLevel]); + currentBlockIndexes[sizeOfBlock-1]+1, + sizeOfBlock,inSymbSizePerLevel[idxLevel], + inPoleSizePerLevel[idxLevel], inLocalSizePerLevel[idxLevel]); // Init cells for(int cellIdInBlock = 0; cellIdInBlock != sizeOfBlock ; ++cellIdInBlock){ - newBlock->newCell(currentBlockIndexes[cellIdInBlock], cellIdInBlock); + newBlock->newCell(currentBlockIndexes[cellIdInBlock], cellIdInBlock, BuildCellFunc, idxLevel); CompositeCellClass newNode = newBlock->getCompleteCell(currentBlockIndexes[cellIdInBlock]); newNode.setMortonIndex(currentBlockIndexes[cellIdInBlock]); @@ -413,16 +428,25 @@ public: * this could be an asset). */ FGroupTreeDyn(const int inTreeHeight, const FReal inBoxWidth, const FPoint<FReal>& inBoxCenter, - const int inNbElementsPerBlock, const size_t inSymbSizePerLevel[], + const int inNbElementsPerBlock, const size_t inSymbSizePerLevel[], const size_t inPoleSizePerLevel[], const size_t inLocalSizePerLevel[], - UnknownDescriptor<FReal> inParticlesContainer[], const FSize nbParticles, - std::function<void(const MortonIndex, const UnknownDescriptor<FReal>[], - const FSize, size_t*, size_t*)> GetSizeFunc, - const bool particlesAreSorted, const bool oneParent, - const FReal inCoverRatio = 0.0, MortonIndex inLeftLimite = -1): - treeHeight(inTreeHeight),nbElementsPerBlock(inNbElementsPerBlock),cellBlocksPerLevel(nullptr), - boxCenter(inBoxCenter), boxCorner(inBoxCenter,-(inBoxWidth/2)), boxWidth(inBoxWidth), - boxWidthAtLeafLevel(inBoxWidth/FReal(1<<(inTreeHeight-1))){ + UnknownDescriptor<FReal> inParticlesContainer[], const FSize nbParticles, + std::function<void(const MortonIndex, const UnknownDescriptor<FReal>[], + const FSize, size_t*, size_t*)> GetSizeFunc, + std::function<void(const MortonIndex, const UnknownDescriptor<FReal> [], + const FSize , + unsigned char* , const size_t, + unsigned char* , const size_t)> InitLeafFunc, + std::function<void(const MortonIndex mindex, + unsigned char* symbBuff, const size_t symbSize, + unsigned char* upBuff, const size_t upSize, + unsigned char* downBuff, const size_t downSize, + const int level)> BuildCellFunc, + const bool particlesAreSorted, const bool oneParent, + const FReal inCoverRatio = 0.0, MortonIndex inLeftLimite = -1): + treeHeight(inTreeHeight),nbElementsPerBlock(inNbElementsPerBlock),cellBlocksPerLevel(nullptr), + boxCenter(inBoxCenter), boxCorner(inBoxCenter,-(inBoxWidth/2)), boxWidth(inBoxWidth), + boxWidthAtLeafLevel(inBoxWidth/FReal(1<<(inTreeHeight-1))){ FAssertLF(inCoverRatio == 0.0 || oneParent == true, "If a ratio is choosen oneParent should be turned on"); const bool userCoverRatio = (inCoverRatio != 0.0); @@ -436,8 +460,8 @@ public: if(particlesAreSorted == false){ for(FSize idxPart = 0 ; idxPart < nbParticles ; ++idxPart){ const FTreeCoordinate host = FCoordinateComputer::GetCoordinateFromPositionAndCorner<FReal>(this->boxCorner, this->boxWidth, - treeHeight, - inParticlesContainer[idxPart].pos); + treeHeight, + inParticlesContainer[idxPart].pos); const MortonIndex particleIndex = host.getMortonIndex(treeHeight-1); inParticlesContainer[idxPart].mindex = particleIndex; inParticlesContainer[idxPart].originalIndex = idxPart; @@ -483,12 +507,12 @@ public: // Create a group CellGroupClass*const newBlock = new CellGroupClass(currentBlockIndexes[0], - currentBlockIndexes[sizeOfBlock-1]+1, - sizeOfBlock, inSymbSizePerLevel[idxLevel], - inPoleSizePerLevel[idxLevel], inLocalSizePerLevel[idxLevel]); + currentBlockIndexes[sizeOfBlock-1]+1, + sizeOfBlock, inSymbSizePerLevel[idxLevel], + inPoleSizePerLevel[idxLevel], inLocalSizePerLevel[idxLevel]); { for(int cellIdInBlock = 0; cellIdInBlock != sizeOfBlock ; ++cellIdInBlock){ - newBlock->newCell(currentBlockIndexes[cellIdInBlock], cellIdInBlock); + newBlock->newCell(currentBlockIndexes[cellIdInBlock], cellIdInBlock, BuildCellFunc, idxLevel); CompositeCellClass newNode = newBlock->getCompleteCell(currentBlockIndexes[cellIdInBlock]); newNode.setMortonIndex(currentBlockIndexes[cellIdInBlock]); @@ -509,8 +533,8 @@ public: } ParticleGroupClass*const newParticleBlock = new ParticleGroupClass(currentBlockIndexes[0], - currentBlockIndexes[sizeOfBlock-1]+1, - sizeOfBlock, symbSizePerLeaf.get(), downSizePerDown.get()); + currentBlockIndexes[sizeOfBlock-1]+1, + sizeOfBlock, symbSizePerLeaf.get(), downSizePerDown.get()); // Init cells FSize offsetParts = firstParticle; @@ -518,10 +542,11 @@ public: // Add leaf newParticleBlock->newLeaf(currentBlockIndexes[cellIdInBlock], cellIdInBlock); - BasicAttachedClass attachedLeaf = newParticleBlock->template getLeaf<BasicAttachedClass>(currentBlockIndexes[cellIdInBlock]); - attachedLeaf.init(currentBlockIndexes[cellIdInBlock], &inParticlesContainer[offsetParts], - nbParticlesPerLeaf[cellIdInBlock], - symbSizePerLeaf[cellIdInBlock], downSizePerDown[cellIdInBlock]); + InitLeafFunc(currentBlockIndexes[cellIdInBlock], &inParticlesContainer[offsetParts], + nbParticlesPerLeaf[cellIdInBlock], + newParticleBlock->getLeafSymbBuffer(currentBlockIndexes[cellIdInBlock]), symbSizePerLeaf[cellIdInBlock], + newParticleBlock->getLeafDownBuffer(currentBlockIndexes[cellIdInBlock]), downSizePerDown[cellIdInBlock]); + offsetParts += nbParticlesPerLeaf[cellIdInBlock]; } @@ -584,12 +609,12 @@ public: if(sizeOfBlock == nbElementsPerBlock || (sizeOfBlock && iterChildCells == iterChildEndCells)){ // Create a group CellGroupClass*const newBlock = new CellGroupClass(currentBlockIndexes[0], - currentBlockIndexes[sizeOfBlock-1]+1, - sizeOfBlock,inSymbSizePerLevel[idxLevel], - inPoleSizePerLevel[idxLevel], inLocalSizePerLevel[idxLevel]); + currentBlockIndexes[sizeOfBlock-1]+1, + sizeOfBlock,inSymbSizePerLevel[idxLevel], + inPoleSizePerLevel[idxLevel], inLocalSizePerLevel[idxLevel]); // Init cells for(int cellIdInBlock = 0; cellIdInBlock != sizeOfBlock ; ++cellIdInBlock){ - newBlock->newCell(currentBlockIndexes[cellIdInBlock], cellIdInBlock); + newBlock->newCell(currentBlockIndexes[cellIdInBlock], cellIdInBlock, BuildCellFunc, idxLevel); CompositeCellClass newNode = newBlock->getCompleteCell(currentBlockIndexes[cellIdInBlock]); newNode.setMortonIndex(currentBlockIndexes[cellIdInBlock]); @@ -640,12 +665,12 @@ public: if(sizeOfBlock){ // Create a group CellGroupClass*const newBlock = new CellGroupClass(currentBlockIndexes[0], - currentBlockIndexes[sizeOfBlock-1]+1, - sizeOfBlock,inSymbSizePerLevel[idxLevel], - inPoleSizePerLevel[idxLevel], inLocalSizePerLevel[idxLevel]); + currentBlockIndexes[sizeOfBlock-1]+1, + sizeOfBlock,inSymbSizePerLevel[idxLevel], + inPoleSizePerLevel[idxLevel], inLocalSizePerLevel[idxLevel]); // Init cells for(int cellIdInBlock = 0; cellIdInBlock != sizeOfBlock ; ++cellIdInBlock){ - newBlock->newCell(currentBlockIndexes[cellIdInBlock], cellIdInBlock); + newBlock->newCell(currentBlockIndexes[cellIdInBlock], cellIdInBlock, BuildCellFunc, idxLevel); CompositeCellClass newNode = newBlock->getCompleteCell(currentBlockIndexes[cellIdInBlock]); newNode.setMortonIndex(currentBlockIndexes[cellIdInBlock]); diff --git a/Src/GroupTree/StarPUUtils/FStarPUFmmPriorities.hpp b/Src/GroupTree/StarPUUtils/FStarPUFmmPriorities.hpp index e13eaf22a77d7a58e75bf0ad05fc170201064d4d..04ba687b6409e0f5f38ce3052d717e49cdd9f5f8 100644 --- a/Src/GroupTree/StarPUUtils/FStarPUFmmPriorities.hpp +++ b/Src/GroupTree/StarPUUtils/FStarPUFmmPriorities.hpp @@ -45,17 +45,16 @@ public: return controller; } - static void InitSchedulerCallback(unsigned sched_ctx_id, - struct _starpu_heteroprio_center_policy_heteroprio *heteroprio){ - Controller().initSchedulerCallback(sched_ctx_id, heteroprio); + static void InitSchedulerCallback(unsigned sched_ctx_id, void* heteroprio){ + Controller().initSchedulerCallback(sched_ctx_id, (struct _starpu_heteroprio_center_policy_heteroprio*)heteroprio); } void init(struct starpu_conf* conf, const int inTreeHeight, FStarPUKernelCapacities* inCapacities){ capacities = inCapacities; - conf->sched_policy = &_starpu_sched_heteroprio_policy, - initialize_heteroprio_center_policy_callback = &InitSchedulerCallback; + conf->sched_policy = &_starpu_sched_heteroprio_policy; + starpu_heteroprio_set_callback(&InitSchedulerCallback); treeHeight = inTreeHeight; diff --git a/Src/GroupTree/StarPUUtils/FStarPUHeteoprio.hpp b/Src/GroupTree/StarPUUtils/FStarPUHeteoprio.hpp index 3c49656aeb33941c187213adcf9d8037a8046c59..6ac1e160af65915976427b50b5b52abcb9a526da 100644 --- a/Src/GroupTree/StarPUUtils/FStarPUHeteoprio.hpp +++ b/Src/GroupTree/StarPUUtils/FStarPUHeteoprio.hpp @@ -59,34 +59,27 @@ * #include "../../Src/GroupTree/StarPUUtils/FStarPUHeteoprio.hpp" * * void initSchedulerCallback(unsigned sched_ctx_id, - * struct _starpu_heteroprio_center_policy_heteroprio *heteroprio){ + * struct _starpu_heteroprio_center_policy_heteroprio *heteroprio){ * // CPU uses 3 buckets - * heteroprio->nb_prio_per_arch_index[FSTARPU_CPU_IDX] = 3; + * starpu_heteroprio_set_nb_prios(heteroprio, FSTARPU_CPU_IDX, 3); * // It uses direct mapping idx => idx * for(unsigned idx = 0 ; idx < 3 ; ++idx){ - * heteroprio->prio_mapping_per_arch_index[FSTARPU_CPU_IDX][idx] = idx; - * // We say CPU is faster - * heteroprio->buckets[idx].factor_base_arch_index = FSTARPU_CPU_IDX; - * // We must say that CPU uses these buckets - * heteroprio->buckets[idx].valide_archs |= STARPU_CPU; + * starpu_heteroprio_set_mapping(heteroprio, FSTARPU_CPU_IDX, idx, idx); + * starpu_heteroprio_set_faster_arch(heteroprio, FSTARPU_CPU_IDX, idx); * } * #ifdef STARPU_USE_OPENCL * // OpenCL is enabled and uses 2 buckets - * heteroprio->nb_prio_per_arch_index[FSTARPU_OPENCL_IDX] = 2; + * starpu_heteroprio_set_nb_prios(heteroprio, FSTARPU_OPENCL_IDX, 2); * // OpenCL will first look to priority 2 - * heteroprio->prio_mapping_per_arch_index[FSTARPU_OPENCL_IDX][0] = 2; - * // We tell the scheduler that OpenCL uses this bucket - * heteroprio->buckets[2].valide_archs |= STARPU_OPENCL; + * starpu_heteroprio_set_mapping(heteroprio, FSTARPU_OPENCL_IDX, 0, 2); * // For this bucket OpenCL is the fastest - * heteroprio->buckets[2].factor_base_arch_index = FSTARPU_OPENCL_IDX; + * starpu_heteroprio_set_faster_arch(heteroprio, FSTARPU_OPENCL_IDX, 2); * // And CPU is 4 times slower - * heteroprio->buckets[2].slow_factors_per_index[FSTARPU_CPU_IDX] = 4.0f; + * starpu_heteroprio_set_arch_slow_factor(heteroprio, FSTARPU_CPU_IDX, 2, 4.0f); * - * heteroprio->prio_mapping_per_arch_index[FSTARPU_OPENCL_IDX][1] = 1; - * // We tell the scheduler that OpenCL uses this bucket - * heteroprio->buckets[1].valide_archs |= STARPU_OPENCL; - * // We let the CPU as the fastest PU and tell that OpenCL is 1.7 times slower - * heteroprio->buckets[1].slow_factors_per_index[FSTARPU_OPENCL_IDX] = 1.7f; + * starpu_heteroprio_set_mapping(heteroprio, FSTARPU_OPENCL_IDX, 1, 1); + * // We let the CPU as the fastest and tell that OpenCL is 1.7 times slower + * starpu_heteroprio_set_arch_slow_factor(heteroprio, FSTARPU_OPENCL_IDX, 1, 1.7f); * #endif * } * @@ -346,9 +339,46 @@ struct _starpu_heteroprio_center_policy_heteroprio unsigned nb_workers_per_arch_index[FSTARPU_NB_TYPES]; }; +/********************************************************************************/ +/********************************************************************************/ + /* This is the callback that must init the scheduler buckets */ -/*extern*/ void (*initialize_heteroprio_center_policy_callback)(unsigned sched_ctx_id, - struct _starpu_heteroprio_center_policy_heteroprio *heteroprio) = NULL; +typedef void (*Heteroprio_callback_type)(unsigned sched_ctx_id, void* heteroprio); +/*extern*/ Heteroprio_callback_type initialize_heteroprio_center_policy_callback = NULL; + + +inline void starpu_heteroprio_set_callback(Heteroprio_callback_type user_callback){ + initialize_heteroprio_center_policy_callback = user_callback; +} + +/** Tell how many prio there are for a given arch */ +inline void starpu_heteroprio_set_nb_prios(void* heterodata, const FStarPUTypes arch, const unsigned max_prio){ + assert(max_prio < HETEROPRIO_MAX_PRIO); + ((struct _starpu_heteroprio_center_policy_heteroprio*)heterodata)->nb_prio_per_arch_index[arch] = max_prio; +} + +/** Set the mapping for a given arch prio=>bucket */ +inline void starpu_heteroprio_set_mapping(void* heterodata, const FStarPUTypes arch, const unsigned source_prio, const unsigned dest_bucket_id){ + assert(dest_bucket_id < HETEROPRIO_MAX_PRIO); + ((struct _starpu_heteroprio_center_policy_heteroprio*)heterodata)->prio_mapping_per_arch_index[arch][source_prio] = dest_bucket_id; + ((struct _starpu_heteroprio_center_policy_heteroprio*)heterodata)->buckets[dest_bucket_id].valide_archs |= FStarPUTypesToArch[arch]; +} + +/** Tell which arch is the faster for the tasks of a bucket (optional) */ +inline void starpu_heteroprio_set_faster_arch(void* heterodata, const FStarPUTypes arch, const unsigned bucket_id){ + assert(bucket_id < HETEROPRIO_MAX_PRIO); + ((struct _starpu_heteroprio_center_policy_heteroprio*)heterodata)->buckets[bucket_id].factor_base_arch_index = arch; + ((struct _starpu_heteroprio_center_policy_heteroprio*)heterodata)->buckets[bucket_id].slow_factors_per_index[arch] = 0; +} + +/** Tell how slow is a arch for the tasks of a bucket (optional) */ +inline void starpu_heteroprio_set_arch_slow_factor(void* heterodata, const FStarPUTypes arch, const unsigned bucket_id, const float slow_factor){ + assert(bucket_id < HETEROPRIO_MAX_PRIO); + ((struct _starpu_heteroprio_center_policy_heteroprio*)heterodata)->buckets[bucket_id].slow_factors_per_index[arch] = slow_factor; +} + +/********************************************************************************/ +/********************************************************************************/ /* Init the scheduler - This will call the init callback! */ static void initialize_heteroprio_center_policy(unsigned sched_ctx_id) diff --git a/Src/Kernels/Interpolation/FInterpMatrixKernel.hpp b/Src/Kernels/Interpolation/FInterpMatrixKernel.hpp index 0558ae5cdc3535c7104c71aef6d2855d653d61a0..5f3ef5f9ce435daa70170196e6d8985d10576f3a 100644 --- a/Src/Kernels/Interpolation/FInterpMatrixKernel.hpp +++ b/Src/Kernels/Interpolation/FInterpMatrixKernel.hpp @@ -433,6 +433,107 @@ struct FInterpMatrixKernelLJ : FInterpAbstractMatrixKernel<FReal> } }; + +/// One over (a+r^2) +template <class FReal> +struct FInterpMatrixKernelAPLUSRR : FInterpAbstractMatrixKernel<FReal> +{ + static const KERNEL_FUNCTION_TYPE Type = NON_HOMOGENEOUS; + static const unsigned int NCMP = 1; //< number of components + static const unsigned int NPV = 1; //< dim of physical values + static const unsigned int NPOT = 1; //< dim of potentials + static const unsigned int NRHS = 1; //< dim of mult exp + static const unsigned int NLHS = 1; //< dim of loc exp + + const FReal CoreWidth; + + FInterpMatrixKernelAPLUSRR(const FReal inCoreWidth = .25) + : CoreWidth(inCoreWidth) + {} + + // copy ctor + FInterpMatrixKernelAPLUSRR(const FInterpMatrixKernelAPLUSRR& other) + : CoreWidth(other.CoreWidth) + {} + + static const char* getID() { return "ONE_OVER_A_PLUS_RR"; } + + // returns position in reduced storage + int getPosition(const unsigned int) const + {return 0;} + + // evaluate interaction + template <class ValueClass> + ValueClass evaluate(const ValueClass& x1, const ValueClass& y1, const ValueClass& z1, + const ValueClass& x2, const ValueClass& y2, const ValueClass& z2) const + { + const ValueClass diffx = (x1-x2); + const ValueClass diffy = (y1-y2); + const ValueClass diffz = (z1-z2); + return FMath::One<ValueClass>() / (FMath::ConvertTo<ValueClass,FReal>(CoreWidth) + // WHY FReal?? + diffx*diffx + + diffy*diffy + + diffz*diffz); + } + + // evaluate interaction (blockwise) + template <class ValueClass> + void evaluateBlock(const ValueClass& x1, const ValueClass& y1, const ValueClass& z1, + const ValueClass& x2, const ValueClass& y2, const ValueClass& z2, ValueClass* block) const + { + block[0]=this->evaluate(x1,y1,z1,x2,y2,z2); + } + + // evaluate interaction and derivative (blockwise) + template <class ValueClass> + void evaluateBlockAndDerivative(const ValueClass& x1, const ValueClass& y1, const ValueClass& z1, + const ValueClass& x2, const ValueClass& y2, const ValueClass& z2, + ValueClass block[1], ValueClass blockDerivative[3]) const + { + const ValueClass diffx = (x1-x2); + const ValueClass diffy = (y1-y2); + const ValueClass diffz = (z1-z2); + const ValueClass r2 = (diffx*diffx + + diffy*diffy + + diffz*diffz); + const ValueClass one_over_a_plus_r2 = FMath::One<ValueClass>() / (FMath::ConvertTo<ValueClass,FReal>(CoreWidth)+r2); + const ValueClass one_over_a_plus_r2_squared = one_over_a_plus_r2*one_over_a_plus_r2; + + block[0] = one_over_a_plus_r2; + + // TODO Fix derivative + const ValueClass coef = FMath::ConvertTo<ValueClass,FReal>(-2.) * one_over_a_plus_r2_squared; + blockDerivative[0] = coef * diffx; + blockDerivative[1] = coef * diffy; + blockDerivative[2] = coef * diffz; + + } + + FReal getScaleFactor(const FReal, const int) const + { + // return 1 because non homogeneous kernel functions cannot be scaled!!! + return FReal(1.0); + } + + FReal getScaleFactor(const FReal) const + { + // return 1 because non homogeneous kernel functions cannot be scaled!!! + return FReal(1.0); + } + + FReal evaluate(const FPoint<FReal>& p1, const FPoint<FReal>& p2) const{ + return evaluate<FReal>(p1.getX(), p1.getY(), p1.getZ(), p2.getX(), p2.getY(), p2.getZ()); + } + void evaluateBlock(const FPoint<FReal>& p1, const FPoint<FReal>& p2, FReal* block) const{ + evaluateBlock<FReal>(p1.getX(), p1.getY(), p1.getZ(), p2.getX(), p2.getY(), p2.getZ(), block); + } + void evaluateBlockAndDerivative(const FPoint<FReal>& p1, const FPoint<FReal>& p2, + FReal block[1], FReal blockDerivative[3]) const { + evaluateBlockAndDerivative<FReal>(p1.getX(), p1.getY(), p1.getZ(), p2.getX(), p2.getY(), p2.getZ(), block, blockDerivative); + } +}; + + //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// // diff --git a/Src/Kernels/Uniform/FUnifKernel.hpp b/Src/Kernels/Uniform/FUnifKernel.hpp index 44ad33a64df11b57f6d7c5b92a8c49a364cb7ca7..d7d003b1a3ad1ecdcc01543e558bb0cadd7175ad 100644 --- a/Src/Kernels/Uniform/FUnifKernel.hpp +++ b/Src/Kernels/Uniform/FUnifKernel.hpp @@ -60,6 +60,8 @@ class FUnifKernel /// Needed for M2L operator const M2LHandlerClass M2LHandler; + /// Leaf level separation criterion + const int LeafLevelSeparationCriterion; public: /** @@ -70,12 +72,15 @@ public: FUnifKernel(const int inTreeHeight, const FReal inBoxWidth, const FPoint<FReal>& inBoxCenter, - const MatrixKernelClass *const inMatrixKernel) + const MatrixKernelClass *const inMatrixKernel, + const int inLeafLevelSeparationCriterion = 1) : FAbstractUnifKernel< FReal, CellClass, ContainerClass, MatrixKernelClass, ORDER, NVALS>(inTreeHeight,inBoxWidth,inBoxCenter), MatrixKernel(inMatrixKernel), M2LHandler(MatrixKernel, inTreeHeight, - inBoxWidth) + inBoxWidth, + inLeafLevelSeparationCriterion), + LeafLevelSeparationCriterion(inLeafLevelSeparationCriterion) { } @@ -186,7 +191,13 @@ public: ContainerClass* const NeighborSourceParticles[27], const int /* size */) { - DirectInteractionComputer<FReal,MatrixKernelClass::NCMP, NVALS>::P2P(TargetParticles,NeighborSourceParticles,MatrixKernel); + // Standard FMM separation criterion, i.e. max 27 neighbor clusters per leaf + if(LeafLevelSeparationCriterion==1) + DirectInteractionComputer<FReal,MatrixKernelClass::NCMP, NVALS>::P2P(TargetParticles,NeighborSourceParticles,MatrixKernel); + // Nearfield interactions are only computed within the target leaf + if(LeafLevelSeparationCriterion==0) + DirectInteractionComputer<FReal,MatrixKernelClass::NCMP, NVALS>::P2PRemote(TargetParticles,NeighborSourceParticles,0,MatrixKernel); + // If criterion equals -1 then no P2P need to be performed. } @@ -194,7 +205,13 @@ public: ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict /*inSources*/, ContainerClass* const inNeighbors[27], const int /*inSize*/) { - DirectInteractionComputer<FReal,MatrixKernelClass::NCMP, NVALS>::P2PRemote(inTargets,inNeighbors,27,MatrixKernel); + // Standard FMM separation criterion, i.e. max 27 neighbor clusters per leaf + if(LeafLevelSeparationCriterion==1) + DirectInteractionComputer<FReal,MatrixKernelClass::NCMP, NVALS>::P2PRemote(inTargets,inNeighbors,27,MatrixKernel); + // Nearfield interactions are only computed within the target leaf + if(LeafLevelSeparationCriterion==0) + DirectInteractionComputer<FReal,MatrixKernelClass::NCMP, NVALS>::P2PRemote(inTargets,inNeighbors,0,MatrixKernel); + // If criterion equals -1 then no P2P need to be performed. } }; diff --git a/Src/Kernels/Uniform/FUnifM2LHandler.hpp b/Src/Kernels/Uniform/FUnifM2LHandler.hpp index 92a2abfab88eaf2f01d1b3dad58a0bceee9c7fe7..0c38e1861998931213f0df9accd0c32022b07c83 100644 --- a/Src/Kernels/Uniform/FUnifM2LHandler.hpp +++ b/Src/Kernels/Uniform/FUnifM2LHandler.hpp @@ -38,14 +38,14 @@ /*! Precomputation of the 316 interactions by evaluation of the matrix kernel on the uniform grid and transformation into Fourier space. PB: Compute() does not belong to the M2LHandler like it does in the Chebyshev kernel. This allows much nicer specialization of the M2LHandler class with respect to the homogeneity of the kernel of interaction like in the ChebyshevSym kernel.*/ template < class FReal,int ORDER, typename MatrixKernelClass> -static void Compute(const MatrixKernelClass *const MatrixKernel, const FReal CellWidth, FComplex<FReal>* &FC) +static void Compute(const MatrixKernelClass *const MatrixKernel, const FReal CellWidth, FComplex<FReal>* &FC, const int SeparationCriterion = 1) { // allocate memory and store compressed M2L operators if (FC) throw std::runtime_error("M2L operators are already set"); // dimensions of operators const unsigned int order = ORDER; const unsigned int nnodes = TensorTraits<ORDER>::nnodes; - const unsigned int ninteractions = 316; + const unsigned int ninteractions = 316+26*(SeparationCriterion<1 ? 1 : 0) + 1*(SeparationCriterion<0 ? 1 : 0); typedef FUnifTensor<FReal,ORDER> TensorType; // interpolation points of source (Y) and target (X) cell @@ -53,7 +53,7 @@ static void Compute(const MatrixKernelClass *const MatrixKernel, const FReal Cel // set roots of target cell (X) TensorType::setRoots(FPoint<FReal>(0.,0.,0.), CellWidth, X); - // allocate memory and compute 316 m2l operators + // allocate memory and compute 316 m2l operators (342 if separation equals 0, 343 if separation equals -1) FReal *_C; FComplex<FReal> *_FC; @@ -78,7 +78,7 @@ static void Compute(const MatrixKernelClass *const MatrixKernel, const FReal Cel for (int i=-3; i<=3; ++i) { for (int j=-3; j<=3; ++j) { for (int k=-3; k<=3; ++k) { - if (abs(i)>1 || abs(j)>1 || abs(k)>1) { + if (abs(i)>SeparationCriterion || abs(j)>SeparationCriterion || abs(k)>SeparationCriterion) { // set roots of source cell (Y) const FPoint<FReal> cy(CellWidth*FReal(i), CellWidth*FReal(j), CellWidth*FReal(k)); FUnifTensor<FReal,order>::setRoots(cy, CellWidth, Y); @@ -110,7 +110,7 @@ static void Compute(const MatrixKernelClass *const MatrixKernel, const FReal Cel } } if (counter != ninteractions) - throw std::runtime_error("Number of interactions must correspond to 316"); + throw std::runtime_error("Number of interactions must correspond to " + std::to_string(ninteractions)); // Free _C delete [] _C; @@ -126,7 +126,7 @@ static void Compute(const MatrixKernelClass *const MatrixKernel, const FReal Cel for (int j=-3; j<=3; ++j) for (int k=-3; k<=3; ++k) { const unsigned int idx = (i+3)*7*7 + (j+3)*7 + (k+3); - if (abs(i)>1 || abs(j)>1 || abs(k)>1) { + if (abs(i)>SeparationCriterion || abs(j)>SeparationCriterion || abs(k)>SeparationCriterion) { FBlas::c_copy(opt_rc, reinterpret_cast<FReal*>(_FC + counter*rc), reinterpret_cast<FReal*>(FC + idx*opt_rc)); counter++; @@ -136,7 +136,7 @@ static void Compute(const MatrixKernelClass *const MatrixKernel, const FReal Cel } if (counter != ninteractions) - throw std::runtime_error("Number of interactions must correspond to 316"); + throw std::runtime_error("Number of interactions must correspond to " + std::to_string(ninteractions)); delete [] _FC; } @@ -182,6 +182,8 @@ class FUnifM2LHandler<FReal, ORDER,HOMOGENEOUS> DftClass Dft; const unsigned int opt_rc; // specific to real valued kernel + /// Leaf level separation criterion + const int LeafLevelSeparationCriterion; static const std::string getFileName() { @@ -195,8 +197,8 @@ class FUnifM2LHandler<FReal, ORDER,HOMOGENEOUS> public: template <typename MatrixKernelClass> - FUnifM2LHandler(const MatrixKernelClass *const MatrixKernel, const unsigned int, const FReal) - : FC(nullptr), Dft(), opt_rc(rc/2+1) + FUnifM2LHandler(const MatrixKernelClass *const MatrixKernel, const unsigned int, const FReal, const int inLeafLevelSeparationCriterion = 1) + : FC(nullptr), Dft(), opt_rc(rc/2+1), LeafLevelSeparationCriterion(inLeafLevelSeparationCriterion) { // init DFT const int steps[dimfft] = {rc}; @@ -214,7 +216,7 @@ public: * Copy constructor */ FUnifM2LHandler(const FUnifM2LHandler& other) - : FC(other.FC), Dft(), opt_rc(other.opt_rc) + : FC(other.FC), Dft(), opt_rc(other.opt_rc), LeafLevelSeparationCriterion(other.LeafLevelSeparationCriterion) { // init DFT const int steps[dimfft] = {rc}; @@ -239,7 +241,7 @@ public: // Compute matrix of interactions const FReal ReferenceCellWidth = FReal(2.); FComplex<FReal>* pFC = NULL; - Compute<FReal,order>(MatrixKernel,ReferenceCellWidth,pFC); + Compute<FReal,order>(MatrixKernel,ReferenceCellWidth,pFC,LeafLevelSeparationCriterion); FC.assign(pFC); // Compute memory usage @@ -250,7 +252,10 @@ public: std::cout << "Compute and set M2L operators ("<< long(sizeM2L/**1e-6*/) <<" B) in " << time.tacAndElapsed() << "sec." << std::endl; } - + + unsigned long long getMemory() const { + return 343*opt_rc*sizeof(FComplex<FReal>); + } /** * Expands potentials \f$x+=IDFT(X)\f$ of a target cell. This operation can be @@ -341,6 +346,8 @@ class FUnifM2LHandler<FReal,ORDER,NON_HOMOGENEOUS> DftClass Dft; const unsigned int opt_rc; // specific to real valued kernel + /// Leaf level separation criterion + const int LeafLevelSeparationCriterion; static const std::string getFileName() { @@ -354,10 +361,10 @@ class FUnifM2LHandler<FReal,ORDER,NON_HOMOGENEOUS> public: template <typename MatrixKernelClass> - FUnifM2LHandler(const MatrixKernelClass *const MatrixKernel, const unsigned int inTreeHeight, const FReal inRootCellWidth) + FUnifM2LHandler(const MatrixKernelClass *const MatrixKernel, const unsigned int inTreeHeight, const FReal inRootCellWidth, const int inLeafLevelSeparationCriterion = 1) : TreeHeight(inTreeHeight), RootCellWidth(inRootCellWidth), - Dft(), opt_rc(rc/2+1) + Dft(), opt_rc(rc/2+1), LeafLevelSeparationCriterion(inLeafLevelSeparationCriterion) { // init DFT const int steps[dimfft] = {rc}; @@ -383,7 +390,7 @@ public: : FC(other.FC), TreeHeight(other.TreeHeight), RootCellWidth(other.RootCellWidth), - Dft(), opt_rc(other.opt_rc) + Dft(), opt_rc(other.opt_rc), LeafLevelSeparationCriterion(other.LeafLevelSeparationCriterion) { // init DFT const int steps[dimfft] = {rc}; @@ -414,9 +421,12 @@ public: CellWidth /= FReal(2.); // at level 2 for (unsigned int l=2; l<TreeHeight; ++l) { + // Determine separation criteria wrt level + const int SeparationCriterion = (l != TreeHeight-1 ? 1 : LeafLevelSeparationCriterion); + // check if already set if (FC[l]) throw std::runtime_error("M2L operator already set"); - Compute<FReal,order>(MatrixKernel,CellWidth,FC[l]); + Compute<FReal,order>(MatrixKernel,CellWidth,FC[l],SeparationCriterion); CellWidth /= FReal(2.); // at level l+1 } @@ -429,6 +439,9 @@ public: << time.tacAndElapsed() << "sec." << std::endl; } + unsigned long long getMemory() const { + return (TreeHeight-2)*343*opt_rc*sizeof(FComplex<FReal>); + } /** * Expands potentials \f$x+=IDFT(X)\f$ of a target cell. This operation can be diff --git a/Src/Kernels/Uniform/FUnifTensorialKernel.hpp b/Src/Kernels/Uniform/FUnifTensorialKernel.hpp index 3becc11741dc3d466f2ccd34644cb7ec694d322b..4ce8f472abc4c24385cbd8e4654a2a5b6822f8ee 100644 --- a/Src/Kernels/Uniform/FUnifTensorialKernel.hpp +++ b/Src/Kernels/Uniform/FUnifTensorialKernel.hpp @@ -84,6 +84,9 @@ protected://PB: for OptiDis /// Needed for M2L operator const M2LHandlerClass M2LHandler; + /// Leaf level separation criterion + const int LeafLevelSeparationCriterion; + public: /** * The constructor initializes all constant attributes and it reads the @@ -94,13 +97,16 @@ public: const FReal inBoxWidth, const FPoint<FReal>& inBoxCenter, const MatrixKernelClass *const inMatrixKernel, - const FReal inBoxWidthExtension) + const FReal inBoxWidthExtension, + const int inLeafLevelSeparationCriterion = 1) : FAbstractUnifKernel< FReal, CellClass, ContainerClass, MatrixKernelClass, ORDER, NVALS>(inTreeHeight,inBoxWidth,inBoxCenter,inBoxWidthExtension), MatrixKernel(inMatrixKernel), M2LHandler(MatrixKernel, inTreeHeight, inBoxWidth, - inBoxWidthExtension) + inBoxWidthExtension, + inLeafLevelSeparationCriterion), + LeafLevelSeparationCriterion(inLeafLevelSeparationCriterion) { } diff --git a/Src/Kernels/Uniform/FUnifTensorialM2LHandler.hpp b/Src/Kernels/Uniform/FUnifTensorialM2LHandler.hpp index 65a8a138359274b624dadaa23643a665b80302fc..cf92d7b28ee1ad494630091d21848dfe68eb4ea0 100644 --- a/Src/Kernels/Uniform/FUnifTensorialM2LHandler.hpp +++ b/Src/Kernels/Uniform/FUnifTensorialM2LHandler.hpp @@ -42,12 +42,13 @@ template < class FReal, int ORDER, class MatrixKernelClass> static void Compute(const MatrixKernelClass *const MatrixKernel, const FReal CellWidth, const FReal CellWidthExtension, - FComplex<FReal>** &FC) + FComplex<FReal>** &FC, + const int SeparationCriterion = 1) { // dimensions of operators const unsigned int order = ORDER; const unsigned int nnodes = TensorTraits<ORDER>::nnodes; - const unsigned int ninteractions = 316; + const unsigned int ninteractions = 316+26*(SeparationCriterion<1 ? 1 : 0) + 1*(SeparationCriterion<0 ? 1 : 0); const unsigned int ncmp = MatrixKernelClass::NCMP; // utils @@ -90,11 +91,14 @@ static void Compute(const MatrixKernelClass *const MatrixKernel, unsigned int perm[rc]; TensorType::setStoragePermutation(perm); + // Allocate intermediate interaction block + FReal* block = new FReal[ncmp]; + unsigned int counter = 0; for (int i=-3; i<=3; ++i) { for (int j=-3; j<=3; ++j) { for (int k=-3; k<=3; ++k) { - if (abs(i)>1 || abs(j)>1 || abs(k)>1) { + if (abs(i)>SeparationCriterion || abs(j)>SeparationCriterion || abs(k)>SeparationCriterion) { // set roots of source cell (Y) const FPoint<FReal> cy(CellWidth*FReal(i), CellWidth*FReal(j), CellWidth*FReal(k)); FUnifTensor<FReal,order>::setRoots(cy, ExtendedCellWidth, Y); @@ -104,8 +108,6 @@ static void Compute(const MatrixKernelClass *const MatrixKernel, for(unsigned int m=0; m<2*order-1; ++m) for(unsigned int n=0; n<2*order-1; ++n){ // Compute current M2L interaction (block matrix) - FReal* block; - block = new FReal[ncmp]; MatrixKernel->evaluateBlock(X[node_ids_pairs[ido][0]], Y[node_ids_pairs[ido][1]], block); @@ -133,6 +135,9 @@ static void Compute(const MatrixKernelClass *const MatrixKernel, if (counter != ninteractions) throw std::runtime_error("Number of interactions must correspond to 316"); + // Free block + delete [] block; + // Free _C for (unsigned int d=0; d<ncmp; ++d) delete [] _C[d]; @@ -149,7 +154,7 @@ static void Compute(const MatrixKernelClass *const MatrixKernel, for (int j=-3; j<=3; ++j) for (int k=-3; k<=3; ++k) { const unsigned int idx = (i+3)*7*7 + (j+3)*7 + (k+3); - if (abs(i)>1 || abs(j)>1 || abs(k)>1) { + if (abs(i)>SeparationCriterion || abs(j)>SeparationCriterion || abs(k)>SeparationCriterion) { for (unsigned int d=0; d<ncmp; ++d) FBlas::c_copy(opt_rc, reinterpret_cast<FReal*>(_FC[d] + counter*rc), reinterpret_cast<FReal*>(FC[d] + idx*opt_rc)); @@ -207,6 +212,8 @@ class FUnifTensorialM2LHandler<FReal, ORDER,MatrixKernelClass,HOMOGENEOUS> DftClass Dft; const unsigned int opt_rc; // specific to real valued kernel + /// Leaf level separation criterion + const int LeafLevelSeparationCriterion; static const std::string getFileName() { @@ -219,9 +226,9 @@ class FUnifTensorialM2LHandler<FReal, ORDER,MatrixKernelClass,HOMOGENEOUS> public: - FUnifTensorialM2LHandler(const MatrixKernelClass *const MatrixKernel, const unsigned int, const FReal, const FReal inCellWidthExtension) + FUnifTensorialM2LHandler(const MatrixKernelClass *const MatrixKernel, const unsigned int, const FReal, const FReal inCellWidthExtension, const int inLeafLevelSeparationCriterion = 1) : CellWidthExtension(inCellWidthExtension), - Dft(), opt_rc(rc/2+1) + Dft(), opt_rc(rc/2+1), LeafLevelSeparationCriterion(inLeafLevelSeparationCriterion) { // init DFT const int steps[dimfft] = {rc}; @@ -245,7 +252,7 @@ public: FUnifTensorialM2LHandler(const FUnifTensorialM2LHandler& other) : FC(other.FC), CellWidthExtension(other.CellWidthExtension), - Dft(), opt_rc(other.opt_rc) + Dft(), opt_rc(other.opt_rc), LeafLevelSeparationCriterion(other.LeafLevelSeparationCriterion) { // init DFT const int steps[dimfft] = {rc}; @@ -278,7 +285,7 @@ public: // but it NEEDS to match the numerator of the scale factor in matrix kernel! // Therefore box width extension is not yet supported for homog kernels const FReal ReferenceCellWidth = FReal(2.); - Compute<FReal,order>(MatrixKernel,ReferenceCellWidth, 0., FC); + Compute<FReal,order>(MatrixKernel,ReferenceCellWidth, 0., FC, LeafLevelSeparationCriterion); // Compute memory usage unsigned long sizeM2L = 343*ncmp*opt_rc*sizeof(FComplex<FReal>); @@ -288,6 +295,10 @@ public: << time.tacAndElapsed() << "sec." << std::endl; } + unsigned long long getMemory() const { + return 343*ncmp*opt_rc*sizeof(FComplex<FReal>); + } + /** * Expands potentials \f$x+=IDFT(X)\f$ of a target cell. This operation can be * seen as part of the L2L operation. @@ -396,6 +407,8 @@ class FUnifTensorialM2LHandler<FReal,ORDER,MatrixKernelClass,NON_HOMOGENEOUS> DftClass Dft; const unsigned int opt_rc; // specific to real valued kernel + /// Leaf level separation criterion + const int LeafLevelSeparationCriterion; static const std::string getFileName() { @@ -408,11 +421,11 @@ class FUnifTensorialM2LHandler<FReal,ORDER,MatrixKernelClass,NON_HOMOGENEOUS> public: - FUnifTensorialM2LHandler(const MatrixKernelClass *const MatrixKernel, const unsigned int inTreeHeight, const FReal inRootCellWidth, const FReal inCellWidthExtension) + FUnifTensorialM2LHandler(const MatrixKernelClass *const MatrixKernel, const unsigned int inTreeHeight, const FReal inRootCellWidth, const FReal inCellWidthExtension, const int inLeafLevelSeparationCriterion = 1) : TreeHeight(inTreeHeight), RootCellWidth(inRootCellWidth), CellWidthExtension(inCellWidthExtension), - Dft(), opt_rc(rc/2+1) + Dft(), opt_rc(rc/2+1), LeafLevelSeparationCriterion(inLeafLevelSeparationCriterion) { // init DFT const int steps[dimfft] = {rc}; @@ -441,7 +454,7 @@ public: TreeHeight(other.TreeHeight), RootCellWidth(other.RootCellWidth), CellWidthExtension(other.CellWidthExtension), - Dft(), opt_rc(other.opt_rc) + Dft(), opt_rc(other.opt_rc), LeafLevelSeparationCriterion(other.LeafLevelSeparationCriterion) { // init DFT const int steps[dimfft] = {rc}; @@ -471,10 +484,12 @@ public: FReal CellWidth = RootCellWidth / FReal(2.); // at level 1 CellWidth /= FReal(2.); // at level 2 for (unsigned int l=2; l<TreeHeight; ++l) { + // Determine separation criteria wrt level + const int SeparationCriterion = (l != TreeHeight-1 ? 1 : LeafLevelSeparationCriterion); // check if already set for (unsigned int d=0; d<ncmp; ++d) if (FC[l][d]) throw std::runtime_error("M2L operator already set"); - Compute<FReal,order>(MatrixKernel,CellWidth,CellWidthExtension,FC[l]); + Compute<FReal,order>(MatrixKernel,CellWidth,CellWidthExtension,FC[l],SeparationCriterion); CellWidth /= FReal(2.); // at level l+1 } @@ -486,6 +501,10 @@ public: << time.tacAndElapsed() << "sec." << std::endl; } + unsigned long long getMemory() const { + return (TreeHeight-2)*343*ncmp*opt_rc*sizeof(FComplex<FReal>); + } + /** * Expands potentials \f$x+=IDFT(X)\f$ of a target cell. This operation can be * seen as part of the L2L operation. diff --git a/Src/Utils/FAlgorithmTimers.hpp b/Src/Utils/FAlgorithmTimers.hpp index 6e219bb75bd4aa4ee8e8183a6d033dc4c5dd7646..aca3ea956830e188d8956724cf20a21cf483de5b 100644 --- a/Src/Utils/FAlgorithmTimers.hpp +++ b/Src/Utils/FAlgorithmTimers.hpp @@ -45,7 +45,7 @@ public: Timers = new FTic[nbTimers]; } - ~FAlgorithmTimers(){ + virtual ~FAlgorithmTimers(){ delete[] Timers; } diff --git a/Src/Utils/FMemStats.h b/Src/Utils/FMemStats.h index fa5e4c14b6f6cfc9205878469c9f574e80ddba9d..828d7d249c9c61ef7b24db81fa66f36a80d97295 100644 --- a/Src/Utils/FMemStats.h +++ b/Src/Utils/FMemStats.h @@ -59,12 +59,20 @@ private: #ifdef SCALFMM_USE_MEM_STATS ~FMemStats(){ + plotState(); + } +#endif + + void plotState() const { +#ifdef SCALFMM_USE_MEM_STATS printf("[SCALFMM-MEMSTAT] Total number of allocations %lld \n", numberOfAllocations); printf("[SCALFMM-MEMSTAT] Memory used at the end %lu Bytes (%f MB)\n", FMemStats::controler.getCurrentAllocated(), FMemStats::controler.getCurrentAllocatedMB()); printf("[SCALFMM-MEMSTAT] Max memory used %lld Bytes (%f MB)\n", FMemStats::controler.getMaxAllocated(), FMemStats::controler.getMaxAllocatedMB()); printf("[SCALFMM-MEMSTAT] Total memory used %lld Bytes (%f MB)\n", FMemStats::controler.getTotalAllocated(), FMemStats::controler.getTotalAllocatedMB()); - } +#else + printf("[SCALFMM-MEMSTAT] unused\n"); #endif + } void allocate(const std::size_t size){ numberOfAllocations += 1; diff --git a/Src/Utils/FParameterNames.hpp b/Src/Utils/FParameterNames.hpp index 2825a4bcf828a4638b8622095758f91dd3b658b9..6fcfcf4741994f4624adb53a1b170a2a272a1a2b 100644 --- a/Src/Utils/FParameterNames.hpp +++ b/Src/Utils/FParameterNames.hpp @@ -182,6 +182,11 @@ static const FParameterNames PhysicalValue = { "The physical value of the particles." }; +static const FParameterNames SeparationCriterion = { + {"-sep", "--separation-criterion"} , + "Specify number of clusters separing 2 well-separated clusters." +}; + /** To print a list of parameters */ inline void PrintUsedOptions(const std::vector<FParameterNames>& options){ std::cout << ">> Here is the list of the parameters you can pass to this application :\n"; diff --git a/Tests/Kernels/testSmoothUnifAlgorithm.cpp b/Tests/Kernels/testSmoothUnifAlgorithm.cpp new file mode 100644 index 0000000000000000000000000000000000000000..72dee1005bab053e4ffc5b121fd3e3260a26bdcd --- /dev/null +++ b/Tests/Kernels/testSmoothUnifAlgorithm.cpp @@ -0,0 +1,237 @@ +// =================================================================================== +// Copyright ScalFmm 2011 INRIA, Olivier Coulaud, Berenger Bramas, Matthias Messner +// olivier.coulaud@inria.fr, berenger.bramas@inria.fr +// This software is a computer program whose purpose is to compute the FMM. +// +// This software is governed by the CeCILL-C and LGPL licenses and +// abiding by the rules of distribution of free software. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public and CeCILL-C Licenses for more details. +// "http://www.cecill.info". +// "http://www.gnu.org/licenses". +// =================================================================================== + + +/** + *@author Pierre Blanchard + * + * **/ +// ==== CMAKE ===== +// @FUSE_FFT +// ================ +// Keep in private GIT +// @SCALFMM_PRIVATE + + +#include <iostream> + +#include <cstdio> +#include <cstdlib> + +#include "Files/FFmaGenericLoader.hpp" + + +#include "Kernels/Uniform/FUnifCell.hpp" +#include "Kernels/Interpolation/FInterpMatrixKernel.hpp" +#include "Kernels/Uniform/FUnifKernel.hpp" + +#include "Components/FSimpleLeaf.hpp" +#include "Kernels/P2P/FP2PParticleContainerIndexed.hpp" + +#include "Utils/FParameters.hpp" +#include "Utils/FMemUtils.hpp" + +#include "Containers/FOctree.hpp" +#include "Containers/FVector.hpp" + +#include "Core/FFmmAlgorithm.hpp" +#include "Core/FFmmAlgorithmThread.hpp" + +#include "../../Src/Utils/FParameterNames.hpp" + +/** + * This program runs the FMM Algorithm with the Uniform kernel + * and a separation criterion of -1 (i.e. no nearfield and only farfield interaction) + * and compares the results with a direct computation. + * The matrix kernel has to be smooth at the origin, e.g. 1/r^2. + */ + +// Simply create particles and try the kernels +int main(int argc, char* argv[]) +{ + FHelpDescribeAndExit(argc, argv, + "Test Uniform kernel and compare it with the direct computation.", + FParameterDefinitions::OctreeHeight,FParameterDefinitions::NbThreads, + FParameterDefinitions::OctreeSubHeight, FParameterDefinitions::InputFile); + + typedef double FReal; + const char* const filename = FParameters::getStr(argc,argv,FParameterDefinitions::InputFile.options, "../Data/test20k.fma"); + const unsigned int TreeHeight = FParameters::getValue(argc, argv, FParameterDefinitions::OctreeHeight.options, 3); + const unsigned int SubTreeHeight = FParameters::getValue(argc, argv, FParameterDefinitions::OctreeSubHeight.options, 2); + const unsigned int NbThreads = FParameters::getValue(argc, argv, FParameterDefinitions::NbThreads.options, 1); + +#ifdef _OPENMP + omp_set_num_threads(NbThreads); + std::cout << "\n>> Using " << omp_get_max_threads() << " threads.\n" << std::endl; +#else + std::cout << "\n>> Sequential version.\n" << std::endl; +#endif + + // init timer + FTic time; + + // interaction kernel evaluator +// typedef FInterpMatrixKernelR<FReal> MatrixKernelClass; + typedef FInterpMatrixKernelAPLUSRR<FReal> MatrixKernelClass; + const MatrixKernelClass MatrixKernel; + + // init particles position and physical value + struct TestParticle{ + FPoint<FReal> position; + FReal forces[3]; + FReal physicalValue; + FReal potential; + }; + + // open particle file + FFmaGenericLoader<FReal> loader(filename); + if(!loader.isOpen()) throw std::runtime_error("Particle file couldn't be opened!"); + + TestParticle* const particles = new TestParticle[loader.getNumberOfParticles()]; + for(FSize idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){ + FPoint<FReal> position; + FReal physicalValue = 0.0; + loader.fillParticle(&position,&physicalValue); + // get copy + particles[idxPart].position = position; + particles[idxPart].physicalValue = physicalValue; + particles[idxPart].potential = 0.0; + particles[idxPart].forces[0] = 0.0; + particles[idxPart].forces[1] = 0.0; + particles[idxPart].forces[2] = 0.0; + } + + //////////////////////////////////////////////////////////////////// + + { // begin direct computation + std::cout << "\nDirect computation ... " << std::endl; + time.tic(); + { + for(FSize idxTarget = 0 ; idxTarget < loader.getNumberOfParticles() ; ++idxTarget){ + for(FSize idxOther = idxTarget + 1 ; idxOther < loader.getNumberOfParticles() ; ++idxOther){ + FP2P::MutualParticles(particles[idxTarget].position.getX(), particles[idxTarget].position.getY(), + particles[idxTarget].position.getZ(), particles[idxTarget].physicalValue, + &particles[idxTarget].forces[0], &particles[idxTarget].forces[1], + &particles[idxTarget].forces[2], &particles[idxTarget].potential, + particles[idxOther].position.getX(), particles[idxOther].position.getY(), + particles[idxOther].position.getZ(), particles[idxOther].physicalValue, + &particles[idxOther].forces[0], &particles[idxOther].forces[1], + &particles[idxOther].forces[2], &particles[idxOther].potential, + &MatrixKernel); + } + } + } + time.tac(); + std::cout << "Done " << "(@Direct computation = " + << time.elapsed() << "s)." << std::endl; + + } // end direct computation + + //////////////////////////////////////////////////////////////////// + + { // begin Lagrange kernel + + // accuracy + const unsigned int ORDER = 5 ; + + // typedefs + typedef FP2PParticleContainerIndexed<FReal> ContainerClass; + typedef FSimpleLeaf<FReal, ContainerClass > LeafClass; + typedef FUnifCell<FReal,ORDER> CellClass; + typedef FOctree<FReal, CellClass,ContainerClass,LeafClass> OctreeClass; + typedef FUnifKernel<FReal,CellClass,ContainerClass,MatrixKernelClass,ORDER> KernelClass; + typedef FFmmAlgorithm<OctreeClass,CellClass,ContainerClass,KernelClass,LeafClass> FmmClass; + // typedef FFmmAlgorithmThread<OctreeClass,CellClass,ContainerClass,KernelClass,LeafClass> FmmClass; + + // init oct-tree + OctreeClass tree(TreeHeight, SubTreeHeight, loader.getBoxWidth(), loader.getCenterOfBox()); + + // Separation criterion for the leaf clusters + const int LeafLevelSeparationCriterion = FParameters::getValue(argc, argv, FParameterDefinitions::SeparationCriterion.options, 1); + + + { // ----------------------------------------------------- + std::cout << "Creating & Inserting " << loader.getNumberOfParticles() + << " particles ..." << std::endl; + std::cout << "\tHeight : " << TreeHeight << " \t sub-height : " << SubTreeHeight << std::endl; + time.tic(); + + for(FSize idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){ + // put in tree + tree.insert(particles[idxPart].position, idxPart, particles[idxPart].physicalValue); + } + + time.tac(); + std::cout << "Done " << "(@Creating and Inserting Particles = " + << time.elapsed() << "s)." << std::endl; + } // ----------------------------------------------------- + + { // ----------------------------------------------------- + std::cout << "\nLagrange/Uniform grid FMM (ORDER="<< ORDER << ") ... " << std::endl; + time.tic(); + KernelClass* kernels = new KernelClass(TreeHeight, loader.getBoxWidth(), loader.getCenterOfBox(),&MatrixKernel, LeafLevelSeparationCriterion); + FmmClass algorithm(&tree, kernels, LeafLevelSeparationCriterion); + algorithm.execute(); + time.tac(); + std::cout << "Done " << "(@Algorithm = " << time.elapsed() << "s)." << std::endl; + } // ----------------------------------------------------- + + + { // ----------------------------------------------------- + std::cout << "\nError computation ... " << std::endl; + FMath::FAccurater<FReal> potentialDiff; + FMath::FAccurater<FReal> fx, fy, fz; + + FReal checkPotential[20000]; + + { // Check that each particle has been summed with all other + + tree.forEachLeaf([&](LeafClass* leaf){ + const FReal*const potentials = leaf->getTargets()->getPotentials(); + const FReal*const forcesX = leaf->getTargets()->getForcesX(); + const FReal*const forcesY = leaf->getTargets()->getForcesY(); + const FReal*const forcesZ = leaf->getTargets()->getForcesZ(); + const FSize nbParticlesInLeaf = leaf->getTargets()->getNbParticles(); + const FVector<FSize>& indexes = leaf->getTargets()->getIndexes(); + + for(FSize idxPart = 0 ; idxPart < nbParticlesInLeaf ; ++idxPart){ + const FSize indexPartOrig = indexes[idxPart]; + //PB: store potential in nbParticles array + checkPotential[indexPartOrig]=potentials[idxPart]; + + potentialDiff.add(particles[indexPartOrig].potential,potentials[idxPart]); + fx.add(particles[indexPartOrig].forces[0],forcesX[idxPart]); + fy.add(particles[indexPartOrig].forces[1],forcesY[idxPart]); + fz.add(particles[indexPartOrig].forces[2],forcesZ[idxPart]); + } + }); + } + + // Print for information + std::cout << "Potential " << potentialDiff << std::endl; + std::cout << "Fx " << fx << std::endl; + std::cout << "Fy " << fy << std::endl; + std::cout << "Fz " << fz << std::endl; + } // ----------------------------------------------------- + + } // end Lagrange kernel + + std::cout << "Memory used at the end " << FMemStats::controler.getCurrentAllocated() << " Bytes (" << FMemStats::controler.getCurrentAllocatedMB() << "MB)\n"; + std::cout << "Max memory used " << FMemStats::controler.getMaxAllocated() << " Bytes (" << FMemStats::controler.getMaxAllocatedMB() << "MB)\n"; + std::cout << "Total memory used " << FMemStats::controler.getTotalAllocated() << " Bytes (" << FMemStats::controler.getTotalAllocatedMB() << "MB)\n"; + + return 0; +} diff --git a/Tests/noDist/AlgoLoaderCostZones.hpp b/Tests/noDist/AlgoLoaderCostZones.hpp new file mode 100644 index 0000000000000000000000000000000000000000..a633f3ccdd0695e8beeed9f6acd11a91c3542f3b --- /dev/null +++ b/Tests/noDist/AlgoLoaderCostZones.hpp @@ -0,0 +1,82 @@ +#ifndef _ALGOLOADERCOSTZONES_HPP_ +#define _ALGOLOADERCOSTZONES_HPP_ + +#include "PerfTestUtils.hpp" + +#include "Core/FFmmAlgorithm.hpp" + +#include "BalanceTree/FFmmAlgorithmThreadBalanced.hpp" +#include "BalanceTree/FCostCell.hpp" +#include "BalanceTree/FCostZones.hpp" + +/** + * \brief Algorithm loader for the CostZones algorithm. + * + * \warning : This loader requires that the KernelLoader supply a type definition + * for a `CostKernelClass` + */ +template <class _TreeLoader, template<typename> class _KernelLoader> +class AlgoLoaderCostZones : public FAlgoLoader<_TreeLoader, _KernelLoader> { +public: + using TreeLoader = _TreeLoader; + using KernelLoader = _KernelLoader<TreeLoader>; + + using FReal = typename TreeLoader::FReal; + using CellClass = typename TreeLoader::CellClass; + using ContainerClass = typename TreeLoader::ContainerClass; + using LeafClass = typename TreeLoader::LeafClass; + using OctreeClass = typename TreeLoader::OctreeClass; + using KernelClass = typename KernelLoader::KernelClass; + using CostKernelClass= typename KernelLoader::CostKernelClass; + + static_assert(std::is_base_of<FCostCellTypeTrait, CellClass>::value, + "The tree cells must derive from FCostCell."); + + using FMMClass = FFmmAlgorithmThreadBalanced + <OctreeClass, CellClass, ContainerClass, KernelClass, LeafClass>; + using CostFmmClass = FFmmAlgorithm + <OctreeClass, CellClass, ContainerClass, CostKernelClass, LeafClass>; + + TreeLoader& _treeLoader; + KernelLoader& _kernelLoader; + + /// Builds the loader + AlgoLoaderCostZones(FPerfTestParams& /*params*/, + TreeLoader& treeLoader, + KernelLoader& kernelLoader) : + _treeLoader(treeLoader), + _kernelLoader(kernelLoader) { + + } + + /// Computes the tree cells costs then runs the costzones and FMM algorithms. + void run() { + + OctreeClass* p_tree = &(_treeLoader._tree); + + // Compute tree cells costs + CostFmmClass costAlgo(p_tree, &(_kernelLoader._costKernel)); + + this->time.tic(); + costAlgo.execute(); + this->time.tac(); + std::cout << "Generating tree cost: " << this->time.elapsed() << "s.\n"; + + FCostZones<OctreeClass, CellClass> costzones(p_tree, omp_get_max_threads()); + + this->time.tic(); + costzones.run(); + this->time.tac(); + std::cout << "Generating cost zones: " << this->time.elapsed() << "s.\n"; + + + this->time.tic(); + FMMClass algo(p_tree, &(_kernelLoader._kernel), costzones.getZoneBounds(), costzones.getLeafZoneBounds()); + algo.execute(); + this->time.tac(); + } +}; + + + +#endif diff --git a/Tests/noDist/AlgoLoaderTask.hpp b/Tests/noDist/AlgoLoaderTask.hpp new file mode 100644 index 0000000000000000000000000000000000000000..42d9dd9ec90cff5d38ef6a75404141870b84bdc3 --- /dev/null +++ b/Tests/noDist/AlgoLoaderTask.hpp @@ -0,0 +1,45 @@ +#ifndef _ALGOLOADERTASK_HPP_ +#define _ALGOLOADERTASK_HPP_ + +#include "PerfTestUtils.hpp" + +#include "Core/FFmmAlgorithmTask.hpp" + + +template <class _TreeLoader, template<typename> class _KernelLoader> +class AlgoLoaderTask : public FAlgoLoader<_TreeLoader, _KernelLoader> { +public: + using TreeLoader = _TreeLoader; + using KernelLoader = _KernelLoader<TreeLoader>; + + + using FReal = typename TreeLoader::FReal; + using CellClass = typename TreeLoader::CellClass; + using ContainerClass = typename TreeLoader::ContainerClass; + using LeafClass = typename TreeLoader::LeafClass; + using OctreeClass = typename TreeLoader::OctreeClass; + using KernelClass = typename KernelLoader::KernelClass; + + using FMMClass = FFmmAlgorithmTask<OctreeClass, CellClass, ContainerClass, KernelClass, LeafClass>; + + TreeLoader& _treeLoader; + KernelLoader& _kernelLoader; + + AlgoLoaderTask(FPerfTestParams& /*params*/, + TreeLoader& treeLoader, + KernelLoader& kernelLoader) : + _treeLoader(treeLoader), + _kernelLoader(kernelLoader) { + + } + + + void run() { + FMMClass algo(&(_treeLoader._tree), &(_kernelLoader._kernel)); + algo.execute(); + } +}; + + + +#endif diff --git a/Tests/noDist/AlgoLoaderThread.hpp b/Tests/noDist/AlgoLoaderThread.hpp new file mode 100644 index 0000000000000000000000000000000000000000..1e68345211c0f9d6c59af3747b920e9859f0a941 --- /dev/null +++ b/Tests/noDist/AlgoLoaderThread.hpp @@ -0,0 +1,46 @@ +#ifndef _ALGOLOADERTHREAD_HPP_ +#define _ALGOLOADERTHREAD_HPP_ + +#include "PerfTestUtils.hpp" + +#include "Core/FFmmAlgorithmThread.hpp" + + +template <class _TreeLoader, template<typename> class _KernelLoader> +class AlgoLoaderThread : public FAlgoLoader<_TreeLoader, _KernelLoader> { +public: + using TreeLoader = _TreeLoader; + using KernelLoader = _KernelLoader<TreeLoader>; + + using FReal = typename TreeLoader::FReal; + using CellClass = typename TreeLoader::CellClass; + using ContainerClass = typename TreeLoader::ContainerClass; + using LeafClass = typename TreeLoader::LeafClass; + using OctreeClass = typename TreeLoader::OctreeClass; + using KernelClass = typename KernelLoader::KernelClass; + + using FMMClass = FFmmAlgorithmThread<OctreeClass, CellClass, ContainerClass, KernelClass, LeafClass>; + + TreeLoader& _treeLoader; + KernelLoader& _kernelLoader; + + bool _omp_static_schedule; + + AlgoLoaderThread(FPerfTestParams& params, + TreeLoader& treeLoader, + KernelLoader& kernelLoader) : + _treeLoader(treeLoader), + _kernelLoader(kernelLoader), + _omp_static_schedule(params.omp_static_schedule) { + + } + + + void run() { + FMMClass algo(&(_treeLoader._tree), &(_kernelLoader._kernel), + _omp_static_schedule); + algo.execute(); + } +}; + +#endif diff --git a/Tests/noDist/BalancePerfTest.cpp b/Tests/noDist/BalancePerfTest.cpp new file mode 100644 index 0000000000000000000000000000000000000000..ebf7dcf05e3379ed370e4ae2648dbd2c2b9d2df7 --- /dev/null +++ b/Tests/noDist/BalancePerfTest.cpp @@ -0,0 +1,416 @@ +// =================================================================================== +// Copyright ScalFmm 2011 INRIA, Olivier Coulaud, Berenger Bramas +// olivier.coulaud@inria.fr, berenger.bramas@inria.fr +// This software is a computer program whose purpose is to compute the FMM. +// +// This software is governed by the CeCILL-C and LGPL licenses and +// abiding by the rules of distribution of free software. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public and CeCILL-C Licenses for more details. +// "http://www.cecill.info". +// "http://www.gnu.org/licenses". +// =================================================================================== + +// ==== CMAKE ==== +// Keep in private GIT +// @SCALFMM_PRIVATE + +// ==== CMAKE ===== +// @FUSE_BLAS +// ================ + + +/** + * \file + * \authors B. Bramas, O. Coulaud + * \brief This program runs the balanced FMM Algorithm with the interpolation + * kernel based on Chebyshev interpolation (1/r kernel) + * + * This program runs the FMM Algorithm with the Chebyshev kernel and compares + * the results with a direct computation. + * + * + * This code is a short example to use the Chebyshev Interpolation approach for the 1/r kernel + */ + + +#include <iostream> +#include <string> + +// Utilities +#include "ScalFmmConfig.h" +#include "Files/FFmaGenericLoader.hpp" +#include "Utils/FParameters.hpp" +#include "Utils/FParameterNames.hpp" + +// Data structures +#include "Kernels/Chebyshev/FChebCell.hpp" +#include "Containers/FOctree.hpp" +#include "Components/FSimpleLeaf.hpp" +#include "Kernels/P2P/FP2PParticleContainerIndexed.hpp" + +// Kernels +#include "Kernels/Interpolation/FInterpMatrixKernel.hpp" +#include "Kernels/Chebyshev/FChebSymKernel.hpp" +#include "BalanceTree/FChebSymCostKernel.hpp" + +// Algorithms +#include "Core/FFmmAlgorithm.hpp" +#include "Core/FFmmAlgorithmThread.hpp" +#include "Core/FFmmAlgorithmTask.hpp" +#include "BalanceTree/FFmmAlgorithmThreadBalanced.hpp" +#include "BalanceTree/FCostZones.hpp" + + +// typedefs +using FReal = double; + +// Chebyshev accuracy +const unsigned int ORDER = 7; + + +class AbstractPerfTest { +protected: + FTic time; + + template <typename... Args> + struct false_type { + bool value = false; + }; + + template <typename...Args> + void loadTree(Args...) { + static_assert(false_type<Args...>::value, + "I don't know how to load this tree with this loader..."); + } + + template <class OctreeClass> + void loadTree(FFmaGenericLoader<FReal>& loader, OctreeClass& tree) { + std::cout << "Creating & inserting particles" << std::flush; + + time.tic(); + + FPoint<FReal> position; + FReal physicalValue = 0.0; + for(FSize idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart) { + // Read particle per particle from file + loader.fillParticle(&position,&physicalValue); + // put particle in octree + tree.insert(position, idxPart, physicalValue); + } + + time.tac(); + std::cout << "Done (" << time.elapsed() << "s)." << std::endl; + } + + virtual void setup() = 0; + virtual void runAlgo() = 0; + virtual void finalize() = 0; + + template <class LeafClass, class OctreeClass, class FmmClass, class LoaderClass> + void finalize(OctreeClass& tree, FmmClass& algo, LoaderClass& loader) { + std::cout << "Timers Far Field \n" + << "P2M " << algo.getTime(FAlgorithmTimers::P2MTimer) << " seconds\n" + << "M2M " << algo.getTime(FAlgorithmTimers::M2MTimer) << " seconds\n" + << "M2L " << algo.getTime(FAlgorithmTimers::M2LTimer) << " seconds\n" + << "L2L " << algo.getTime(FAlgorithmTimers::L2LTimer) << " seconds\n" + << "P2P and L2P " << algo.getTime(FAlgorithmTimers::NearTimer) << " seconds\n" + << std::endl; + + std::cout << "Done " << "(@Algorithm = " << time.elapsed() << " s) ." << std::endl; + + FSize N1 = 0, N2 = loader.getNumberOfParticles()/2, N3 = loader.getNumberOfParticles() - 1; + FReal energy = 0.0; + // + // Loop over all leaves + // + std::cout << std::endl; + std::cout << std::scientific; + std::cout.precision(10) ; + + tree.forEachLeaf([&](LeafClass* leaf){ + const FReal*const posX = leaf->getTargets()->getPositions()[0]; + const FReal*const posY = leaf->getTargets()->getPositions()[1]; + const FReal*const posZ = leaf->getTargets()->getPositions()[2]; + + const FReal*const potentials = leaf->getTargets()->getPotentials(); + const FReal*const forcesX = leaf->getTargets()->getForcesX(); + const FReal*const forcesY = leaf->getTargets()->getForcesY(); + const FReal*const forcesZ = leaf->getTargets()->getForcesZ(); + const FSize nbParticlesInLeaf = leaf->getTargets()->getNbParticles(); + const FReal*const physicalValues = leaf->getTargets()->getPhysicalValues(); + + const FVector<FSize>& indexes = leaf->getTargets()->getIndexes(); + + for(FSize idxPart = 0 ; idxPart < nbParticlesInLeaf ; ++idxPart){ + const FSize indexPartOrig = indexes[idxPart]; + if ((indexPartOrig == N1) || (indexPartOrig == N2) || (indexPartOrig == N3) ) { + std::cout << "Index "<< indexPartOrig <<" potential " << potentials[idxPart] + << " Pos "<<posX[idxPart]<<" "<<posY[idxPart]<<" "<<posZ[idxPart] + << " Forces: " << forcesX[idxPart] << " " << forcesY[idxPart] << " "<< forcesZ[idxPart] <<std::endl; + } + energy += potentials[idxPart]*physicalValues[idxPart] ; + } + }); + std::cout <<std::endl<<"Energy: "<< energy<<std::endl; + } + +public: + virtual ~AbstractPerfTest(){}; + + void run() { + this->setup(); + this->runAlgo(); + this->finalize(); + } + +}; + +template < template<typename...> class Algo > class PerfTest; + +template <> +class PerfTest<FFmmAlgorithmThread> : public AbstractPerfTest { +public: // typedefs + using CellClass = FChebCell<FReal, ORDER>; + using ContainerClass = FP2PParticleContainerIndexed<FReal>; + using LeafClass = FSimpleLeaf<FReal, ContainerClass >; + using OctreeClass = FOctree<FReal, CellClass, ContainerClass, LeafClass>; + using MatrixKernelClass = FInterpMatrixKernelR<FReal>; + using KernelClass = FChebSymKernel <FReal, CellClass, ContainerClass, + MatrixKernelClass, ORDER>; + + using FmmClass = FFmmAlgorithmThread<OctreeClass, CellClass, ContainerClass, KernelClass, LeafClass>; + +protected: + int _nbThreads; + FFmaGenericLoader<FReal> _loader; + OctreeClass _tree; + FmmClass* _algo; + + bool _ompStaticScheduling; + +public: + PerfTest(const std::string& fileName, const int nbThreads, const int treeHeight, const int subTreeHeight, bool ompStaticScheduling) : + _nbThreads(nbThreads) , + _loader(fileName), + _tree(treeHeight, subTreeHeight,_loader.getBoxWidth(),_loader.getCenterOfBox()), + _ompStaticScheduling(ompStaticScheduling) { + } + + ~PerfTest() { + if(_algo != nullptr) + delete _algo; + } + +protected: + virtual void setup() { + omp_set_num_threads(_nbThreads); + std::cout << "\n>> Using " << omp_get_max_threads() << " threads.\n" << std::endl; + + loadTree(_loader,_tree); + } + + virtual void runAlgo() { + time.tic(); + const MatrixKernelClass MatrixKernel; + KernelClass kernels(_tree.getHeight(), _loader.getBoxWidth(), _loader.getCenterOfBox(),&MatrixKernel); + _algo = new FmmClass(&_tree, &kernels,_ompStaticScheduling); + + _algo->execute(); + time.tac(); + } + + void finalize() { + AbstractPerfTest::finalize<LeafClass>(_tree, *_algo, _loader); + } +}; + +template <> +class PerfTest<FFmmAlgorithmTask> : public AbstractPerfTest { +public: // typedefs + using CellClass = FChebCell<FReal, ORDER>; + using ContainerClass = FP2PParticleContainerIndexed<FReal>; + using LeafClass = FSimpleLeaf<FReal, ContainerClass >; + using OctreeClass = FOctree<FReal, CellClass, ContainerClass, LeafClass>; + using MatrixKernelClass = FInterpMatrixKernelR<FReal>; + using KernelClass = FChebSymKernel <FReal, CellClass, ContainerClass, + MatrixKernelClass, ORDER>; + + using FmmClass = FFmmAlgorithmTask<OctreeClass, CellClass, ContainerClass, KernelClass, LeafClass>; + +protected: + int _nbThreads; + FFmaGenericLoader<FReal> _loader; + OctreeClass _tree; + FmmClass* _algo; + +public: + PerfTest(const std::string& fileName, const int nbThreads, const int treeHeight, const int subTreeHeight) : + _nbThreads(nbThreads) , + _loader(fileName), + _tree(treeHeight, subTreeHeight, _loader.getBoxWidth(), _loader.getCenterOfBox()) { + } + + ~PerfTest() { + if(_algo != nullptr) + delete _algo; + } + +protected: + virtual void setup() { + omp_set_num_threads(_nbThreads); + std::cout << "\n>> Using " << omp_get_max_threads() << " threads.\n" << std::endl; + + loadTree(_loader,_tree); + } + + virtual void runAlgo() { + time.tic(); + const MatrixKernelClass MatrixKernel; + KernelClass kernels(_tree.getHeight(), _loader.getBoxWidth(), _loader.getCenterOfBox(),&MatrixKernel); + _algo = new FmmClass(&_tree, &kernels); + + _algo->execute(); + time.tac(); + } + + void finalize() { + AbstractPerfTest::finalize<LeafClass>(_tree, *_algo, _loader); + } +}; + + + +template <> +class PerfTest<FFmmAlgorithmThreadBalanced> : public AbstractPerfTest { +public: // typedefs + using ContainerClass = FP2PParticleContainerIndexed<FReal>; + using CellClass = FCostCell <FChebCell<FReal, ORDER>>; + using LeafClass = FSimpleLeaf<FReal, ContainerClass >; + using OctreeClass = FOctree <FReal, CellClass, ContainerClass, LeafClass>; + using MatrixKernelClass = FInterpMatrixKernelR<FReal>; + using KernelClass = FChebSymKernel <FReal, CellClass, ContainerClass, + MatrixKernelClass, ORDER>; + using CostKernelClass = FChebSymCostKernel <FReal, CellClass, ContainerClass, + MatrixKernelClass, ORDER, OctreeClass>; + + template <template <typename...> class T, typename Kernel> + using FmmAlgoClass = T<OctreeClass, CellClass, ContainerClass, Kernel, LeafClass>; + + using FmmClass = FmmAlgoClass<FFmmAlgorithmThreadBalanced, KernelClass>; + using CostFmmClass = FmmAlgoClass<FFmmAlgorithm, CostKernelClass>; + + const FReal epsilon = 1e-4; + +protected: + + int _nbThreads; + FFmaGenericLoader<FReal> _loader; + OctreeClass _tree; + FmmClass* _algo; + +public: + PerfTest<FFmmAlgorithmThreadBalanced>( + const std::string& fileName, const int nbThreads, + const int treeHeight, const int subTreeHeight) : + _nbThreads(nbThreads) , + _loader(fileName), + _tree(treeHeight, subTreeHeight, _loader.getBoxWidth(), _loader.getCenterOfBox()) { + } + + ~PerfTest<FFmmAlgorithmThreadBalanced>() { + if(_algo != nullptr) + delete _algo; + } + +protected: + virtual void setup() { + omp_set_num_threads(_nbThreads); + std::cout << "\n>> Using " << omp_get_max_threads() << " threads.\n" << std::endl; + + loadTree(_loader,_tree); + } + + virtual void runAlgo() { + // Compute tree cells costs + CostKernelClass balanceKernel(&_tree, epsilon); + CostFmmClass costAlgo(&_tree, &balanceKernel); + + time.tic(); + costAlgo.execute(); + time.tac(); + std::cout << "Generating tree cost: " << time.elapsed() << "s.\n"; + + FCostZones<OctreeClass, CellClass> costzones(&_tree, omp_get_max_threads()); + + time.tic(); + costzones.run(); + time.tac(); + std::cout << "Generating cost zones: " << time.elapsed() << "s.\n"; + + time.tic(); + const MatrixKernelClass MatrixKernel; + KernelClass kernels(_tree.getHeight(), _loader.getBoxWidth(), _loader.getCenterOfBox(),&MatrixKernel); + _algo = new FmmClass(&_tree, &kernels, costzones.getZoneBounds(), costzones.getLeafZoneBounds()); + + _algo->execute(); + time.tac(); + } + + void finalize() { + AbstractPerfTest::finalize<LeafClass>(_tree, *_algo, _loader); + } +}; + + + +// Simply create particles and try the kernels +int main(int argc, char* argv[]) +{ + FHelpDescribeAndExit(argc, argv, + "Driver for Chebyshev interpolation kernel (1/r kernel).", + FParameterDefinitions::InputFile, + FParameterDefinitions::OctreeHeight, + FParameterDefinitions::OctreeSubHeight, + FParameterDefinitions::NbThreads, + {{"--algo"},"Algorithm to run (costzones, basic-static, basic-dynamic, task)"}); + + const std::string defaultFile("../Data/unitCubeXYZQ100.bfma" ); + const std::string filename = + FParameters::getStr(argc,argv,FParameterDefinitions::InputFile.options, defaultFile.c_str()); + const unsigned int TreeHeight = + FParameters::getValue(argc, argv, FParameterDefinitions::OctreeHeight.options, 5); + const unsigned int SubTreeHeight = + FParameters::getValue(argc, argv, FParameterDefinitions::OctreeSubHeight.options, 2); + const unsigned int NbThreads = + FParameters::getValue(argc, argv, FParameterDefinitions::NbThreads.options, 1); + const std::string algoChoice = + FParameters::getStr(argc,argv,{"--algo"},"costzones"); + + std::cout << "file: " << filename << " tree height: " << TreeHeight + << "(" << SubTreeHeight << ") algo: " << algoChoice << std::endl; + + if(algoChoice == "costzones") { + PerfTest<FFmmAlgorithmThreadBalanced> + balancePerfTest(filename, NbThreads, TreeHeight, SubTreeHeight); + balancePerfTest.run(); + } else if (algoChoice == "basic-static") { + PerfTest<FFmmAlgorithmThread> + threadPerfTestStatic(filename, NbThreads, TreeHeight, SubTreeHeight, true); + threadPerfTestStatic.run(); + } else if (algoChoice == "basic-dynamic") { + PerfTest<FFmmAlgorithmThread> + threadPerfTestDynamic(filename, NbThreads, TreeHeight, SubTreeHeight,false); + threadPerfTestDynamic.run(); + } else if (algoChoice == "task") { + PerfTest<FFmmAlgorithmTask> + taskPerfTest(filename, NbThreads, TreeHeight, SubTreeHeight); + taskPerfTest.run(); + } else { + std::cerr << "Wrong algorithm choice. Try 'basic' or 'costzones'." << std::endl; + } + +} diff --git a/Tests/noDist/KernelLoaderFChebSym.hpp b/Tests/noDist/KernelLoaderFChebSym.hpp new file mode 100644 index 0000000000000000000000000000000000000000..e1cfd6e0e2f70a8e85a59efa46b6231061237016 --- /dev/null +++ b/Tests/noDist/KernelLoaderFChebSym.hpp @@ -0,0 +1,67 @@ +#ifndef _KERNELLOADERFCHEBSYM_HPP_ +#define _KERNELLOADERFCHEBSYM_HPP_ + +#include "PerfTestUtils.hpp" + +#include "Kernels/Interpolation/FInterpMatrixKernel.hpp" +#include "Kernels/Chebyshev/FChebSymKernel.hpp" + +#include "BalanceTree/FChebSymCostKernel.hpp" + +/** + * \brief Kernel loader for the symetric Chebyshev kernel. + * + * \warning This loader requires that TreeLoader::CellClass inherits from + * FChebCell. + * + * \note This loader also provides the typedef CostKernelClass and a member + * _costKernel that cam be used by the AlgoLoaderCostZones. + */ +template <typename _TreeLoader> +class KernelLoaderFChebSym : public FKernelLoader<_TreeLoader> { + +public: + // Required type definitions + using TreeLoader = _TreeLoader; + using FReal = typename TreeLoader::FReal; + /// Must derive from FChebCell + using CellClass = typename TreeLoader::CellClass; + using ContainerClass = typename TreeLoader::ContainerClass; + using OctreeClass = typename TreeLoader::OctreeClass; + + using MatrixKernelClass = FInterpMatrixKernelR<FReal>; + using KernelClass = FChebSymKernel <FReal, CellClass, ContainerClass, + MatrixKernelClass, TreeLoader::ORDER>; + /// Kernel class used to compute the tree cell costs. + using CostKernelClass = FChebSymCostKernel<FReal, CellClass, ContainerClass, + MatrixKernelClass, TreeLoader::ORDER, + OctreeClass>; + + // Meaningfull (?) error message. + static_assert(std::is_base_of<FChebCell<FReal,TreeLoader::ORDER>,CellClass>::value, + "TreeLoader::CellClass must derive from FChebCell"); + + + const FReal epsilon = 1e-4; + const MatrixKernelClass _matrixKernel; + /// kernel used to compute the tree cells interactions. + KernelClass _kernel; + /// Kernel used to compute the tree cells costs. + CostKernelClass _costKernel; + + /// Builds the loader and loads the kernel. + KernelLoaderFChebSym(FPerfTestParams& /*params*/, TreeLoader& treeLoader) : + _matrixKernel(), + _kernel(treeLoader._tree.getHeight(), + treeLoader._tree.getBoxWidth(), + treeLoader._tree.getBoxCenter(), + &_matrixKernel), + _costKernel(&(treeLoader._tree), epsilon){ + + } + + +}; + + +#endif diff --git a/Tests/noDist/PerfTest.cpp b/Tests/noDist/PerfTest.cpp new file mode 100644 index 0000000000000000000000000000000000000000..25ddbf9a944efe17f6a93fdb1516f171713b0ea7 --- /dev/null +++ b/Tests/noDist/PerfTest.cpp @@ -0,0 +1,90 @@ + +/** + * \file + * \author Quentin Khan + * + * This program is used to run different performance tests for the various + * algorithms that have been implemented for ScalFMM. + * + * See the PerfUtils.hpp file classes for some more in depth information. Run + * with argument --help for usage information. + */ + + +#include <iostream> +#include <string> + +#include "Utils/FParameters.hpp" +#include "Utils/FParameterNames.hpp" + +#include "PerfTestUtils.hpp" + +#include "TreeLoaderFCheb.hpp" + +#include "KernelLoaderFChebSym.hpp" + +#include "AlgoLoaderThread.hpp" +#include "AlgoLoaderTask.hpp" +#include "AlgoLoaderCostZones.hpp" + +/** + * \brief Runs a generic sequence of actions to use an algorithm. + * + * This function runs the basic steps that are needed to run an FMM algorithm + * over a set of particles. It does the following steps : + * + * - Load a tree using the class defined as a TreeLoader + * - Prepares the needed kernels using the KernelLoader + * - Prepares and runs the algorithm using the AlgorithmLoader + * + * See documentation of FTreeLoader, FKernelLoader, FAlgoLoader. + */ +template <class TreeLoader, + template <typename TL> class KernelLoader, + template <typename TL, template <typename TL> class KL> class AlgoLoader> +void runperf(FPerfTestParams& params) +{ + TreeLoader treeLoader(params); + KernelLoader<TreeLoader> kernelLoader(params, treeLoader); + AlgoLoader<TreeLoader, KernelLoader> algoLoader(params, treeLoader, kernelLoader); + algoLoader.run(); +} + +int main (int argc, char** argv) +{ + FHelpDescribeAndExit(argc, argv, + "Driver for Chebyshev interpolation kernel (1/r kernel).", + FParameterDefinitions::InputFile, + FParameterDefinitions::OctreeHeight, + FParameterDefinitions::OctreeSubHeight, + FParameterDefinitions::NbThreads, + {{"--algo"},"Algorithm to run (costzones, basic, task)"}, + {{"--schedule"},"OpenMP scheduling policy (static, dynamic)."}); + FPerfTestParams params; + { + using namespace FParameterDefinitions; + using namespace FParameters; + params.filename = getStr(argc,argv,InputFile.options, + "../Data/unitCubeXYZQ100.bfma"); + params.treeHeight = getValue(argc, argv, OctreeHeight.options, 5); + params.subTreeHeight = getValue(argc, argv, OctreeSubHeight.options, 2); + params.nbThreads = getValue(argc, argv, NbThreads.options, 1); + params.algo = getStr(argc,argv,{"--algo"},"task"); + params.omp_static_schedule = + getStr(argc,argv,{"--schedule"},"dynamic") == std::string("static"); + } + + omp_set_num_threads(params.nbThreads); + + if( "basic" == params.algo ) { + runperf<TreeLoaderFCheb<>, KernelLoaderFChebSym, AlgoLoaderThread>(params); + } else if( "task" == params.algo ) { + runperf<TreeLoaderFCheb<>, KernelLoaderFChebSym, AlgoLoaderTask>(params); + } else if ( "costzones" == params.algo ) { + runperf<TreeLoaderFCheb<>, KernelLoaderFChebSym, AlgoLoaderCostZones>(params); + } else { + std::cout << "Unknown algorithm: " << params.algo << std::endl; + } + + +} diff --git a/Tests/noDist/PerfTestUtils.hpp b/Tests/noDist/PerfTestUtils.hpp new file mode 100644 index 0000000000000000000000000000000000000000..6c300207d3a8dbfb8da57b7fd7ca316ffe95c6a3 --- /dev/null +++ b/Tests/noDist/PerfTestUtils.hpp @@ -0,0 +1,176 @@ +#ifndef _PERFTESTUTILS_HPP_ +#define _PERFTESTUTILS_HPP_ + +#include <string> + +#include "Utils/FTic.hpp" +#include "Files/FFmaGenericLoader.hpp" + +#include "Containers/FOctree.hpp" + +/** + * \brief Store the PerfTest program parameters. + */ +struct FPerfTestParams { + int subTreeHeight = 2; ///< Subtree height. + int treeHeight = 5; ///< Tree height. + int nbThreads = 1; ///< Maximum number of threads (when used). + std::string filename = ""; ///< Particles file. + std::string algo = "task"; ///< Algorithm to run. + bool omp_static_schedule = false; ///< OpenMP static or dynamic schedule. +}; + + +/** + * \brief Base class for tree loaders. + * + * This class itself does not provide anything but a base on which to build tree + * loaders. A tree loader should satisfy the following rules. + * + * - Define the public typedefs : CellClass, ContainerClass, LeafClass, + * OctreeClass. + * - Provide public acces to a member of type OctreeClass _tree as the tree + * that is loaded. + * - Tree loading must happen at construction. + * - It may provide any other members or typdefs required by a special + * FKernelLoader or FAlgoLoader. + * + * For convenience, this class provides a timer and a basic loadTree method that + * should be enough to load a tree from and FMA file. + * + * \note It is not mandatory that a loader inherit from this class. It must + * however follow the aforementioned rules. + */ +class FTreeLoader { +public: + /// A timer + /** Is used to time the loadTree method. + */ + FTic time; +protected: + + /** + * \brief A template which type is always false. + * + * This template is only expanded by the compiler when it is requested + * (ie. the compiler will not try to optimize out its value.). Must be used + * to create false static_assert to catch unintended use of a template. + */ + template <typename... Args> + struct false_type { + bool value = false; + }; + + /** + * \brief Failure method for unimplemented loadTree templates. + * + * This template will catch unspecialised call to the loadTree method and + * will cause the compilation to fail with a (somewhat) meaningfull message. + */ + template <typename...Args> + void loadTree(Args...) { + static_assert(false_type<Args...>::value, + "I don't know how to load this tree with this loader..."); + } + + + /** + * \brief Simple method to load a tree from a FMA file. + * + * The template parameters are usualy guessed by the compiler. + * + * \tparam OctreeClass The class of the tree to fill. + * \tparam FReal The floating point type. + * + * \param loader The file loader to read from the file. + * \param tree The tree to be filled. + */ + template <class OctreeClass, typename FReal> + void loadTree(FFmaGenericLoader<FReal>& loader, OctreeClass& tree) { + std::cout << "Creating & inserting particles" << std::flush; + + time.tic(); + + FPoint<FReal> position; + FReal physicalValue = 0.0; + for(FSize idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart) { + // Read particle per particle from file + loader.fillParticle(&position,&physicalValue); + // put particle in octree + tree.insert(position, idxPart, physicalValue); + } + + time.tac(); + std::cout << " Done (" << time.elapsed() << "s)." << std::endl; + } + +}; + +/** + * \brief Base class for kernel loaders. + * + * This class itself does not provide anything but a base on which to build + * kernel loaders. A kernel loader should satisfy the following rules. + * + * - Define the public typedefs : TreeLoader, KernelClass. + * - Provide public acces to a member of type Kernelclass _kernel as the + * kernel that is loaded. + * - Kernel loading must happen at construction. + * - It may provide any other members or typdefs required by a special + * FAlgoLoader. + * + * For convenience, this class provides a timer. + * + * \tparam _TreeLoader The tree loader that was used. + * + * \note It is not mandatory that a loader inherit from this class. It must + * however follow the aforementioned rules. + */ +template<class _TreeLoader> +class FKernelLoader { + /// The tree loader that was used (see FTreeLoader). + using TreeLoader = _TreeLoader; +public: + FTic time; +}; + +/** + * \brief Base class for algorithm loaders. + * + * This class itself does not provide anything but a base on which to build + * algorithm loaders. A kernel loader should satisfy the following rules. + * + * - Define the public typedefs : TreeLoader, KernelLoader. + * - Provide public acces to a member of type + * \link TreeLoader Treeloader::OctreeClass* \endlink` _algo` + * as the algorithm that is loaded. This pointer should be valid from the + * end of the ::run method to the destruction of the loader. + * - It may provide any other members or typdefs. + * + * For convenience, this class provides a timer. + * + * \tparam _TreeLoader The tree loader that was used. + * \tparam _KernelLoader The kernel loader *template* that was used, the + * KernelLoader type will then be _KernelLoader<_TreeLoader>. + * + * \note It is not mandatory that a loader inherit from this class. It must + * however follow the aforementioned rules. + */ +template <class _TreeLoader, template<typename> class _KernelLoader> +class FAlgoLoader { + /// The tree loader that was used (see FTreeLoader). + using TreeLoader = _TreeLoader; + /// The kernel loader that was used (see FKernelLoader). + using KernelLoader = _KernelLoader<TreeLoader>; +public: + /// A timer. + FTic time; + /// Method that runs the algorithm. + virtual void run() = 0; +}; + + + + + +#endif diff --git a/Tests/noDist/TreeLoaderFCheb.hpp b/Tests/noDist/TreeLoaderFCheb.hpp new file mode 100644 index 0000000000000000000000000000000000000000..6b1a8a9011251057673ee5dd55288fc0dca1d5bb --- /dev/null +++ b/Tests/noDist/TreeLoaderFCheb.hpp @@ -0,0 +1,48 @@ +#ifndef _TREELOADERFCHEB_HPP_ +#define _TREELOADERFCHEB_HPP_ + +#include "PerfTestUtils.hpp" + +#include "Kernels/Chebyshev/FChebCell.hpp" +#include "Containers/FOctree.hpp" +#include "Components/FSimpleLeaf.hpp" +#include "Kernels/P2P/FP2PParticleContainerIndexed.hpp" + +#include "BalanceTree/FCostCell.hpp" + +/** + * \brief Tree loader for a Chebyshev cell type tree. + * + * See FTreeLoader documentation. + */ +template <typename _FReal = double> +class TreeLoaderFCheb : public FTreeLoader { +public: + using FReal = _FReal; + enum {ORDER = 7}; ///< Chebyshev interpolation order. + + // Required type definitions. + using CellClass = FCostCell<FChebCell<FReal, ORDER>>; + using ContainerClass = FP2PParticleContainerIndexed<FReal>; + using LeafClass = FSimpleLeaf<FReal, ContainerClass >; + using OctreeClass = FOctree<FReal, CellClass, ContainerClass, LeafClass>; + + /// File loader. + FFmaGenericLoader<FReal> _loader; + /// Required tree member. + OctreeClass _tree; + + /// Constructs the loader and loads the tree. + TreeLoaderFCheb(FPerfTestParams& params): + _loader(params.filename), + _tree(params.treeHeight, + params.subTreeHeight, + _loader.getBoxWidth(), + _loader.getCenterOfBox()) { + this->loadTree(_loader, _tree); + } + + +}; + +#endif diff --git a/Tests/noDist/testBlockedAlgorithmDyn.cpp b/Tests/noDist/testBlockedAlgorithmDyn.cpp index 010cbce5f7e2d62feb1741e4c7659a5e842a73d9..9fd0f7813e973178e2804c084f85260d0414f53b 100644 --- a/Tests/noDist/testBlockedAlgorithmDyn.cpp +++ b/Tests/noDist/testBlockedAlgorithmDyn.cpp @@ -110,10 +110,6 @@ public: : symb(nullptr), up(nullptr), down(nullptr){ } - void init(){ - resetToInitialState(); - } - void release(){ // nothing } @@ -265,13 +261,33 @@ int main(int argc, char* argv[]){ // size_t* inSymbSize, size_t* inDownSize) { // GroupContainerClass::GetSizeContainerFunc<ContainerClass>( // inIndex, inParticles, inSymbSize, inDownSize); -// }); +// }, +// [](const MortonIndex /*mindex*/, +// unsigned char* symbBuff, const size_t /*symbSize*/, +// unsigned char* upBuff, const size_t /*upSize*/, +// unsigned char* downBuff, const size_t /*downSize*/, + // const int /*inLevel*/){ +// GroupCellClass cell(symbBuff, upBuff, downBuff); +// }); GroupOctreeClass groupedTree(NbLevels, loader.getBoxWidth(), loader.getCenterOfBox(), groupSize, cellSymbSizePerLevel.get(), cellUpSizePerLevel.get(), cellDownSizePerLevel.get(), allParticles.get(), loader.getNumberOfParticles(), [](const MortonIndex inIndex, const UnknownDescriptor<FReal> inParticles[], const FSize inNbParticles, size_t* inSymbSize, size_t* inDownSize){ GroupContainerClass::GetSizeFunc(inIndex, inParticles, inNbParticles, inSymbSize, inDownSize); + }, + [](const MortonIndex inIndex, const UnknownDescriptor<FReal> inParticles[], + const FSize inNbParticles, unsigned char* symbBuffer, const size_t inSymbSize, + unsigned char* downBuffer, const size_t inDownSize){ + GroupContainerClass leaf(symbBuffer, downBuffer); + leaf.init(inIndex, inParticles, inNbParticles, inSymbSize, inDownSize); + }, + [](const MortonIndex /*mindex*/, + unsigned char* symbBuff, const size_t /*symbSize*/, + unsigned char* upBuff, const size_t /*upSize*/, + unsigned char* downBuff, const size_t /*downSize*/, + const int /*inLevel*/){ + GroupCellClass cell(symbBuff, upBuff, downBuff); }); // GroupOctreeClass groupedTree(NbLevels, loader.getBoxWidth(), loader.getCenterOfBox(), groupSize, // cellSymbSizePerLevel.get(), cellUpSizePerLevel.get(), cellDownSizePerLevel.get(), @@ -280,6 +296,19 @@ int main(int argc, char* argv[]){ // const FSize inNbParticles, size_t* inSymbSize, size_t* inDownSize){ // GroupContainerClass::GetSizeFunc(inIndex, inParticles, inNbParticles, inSymbSize, inDownSize); // }, +// [](const MortonIndex inIndex, const UnknownDescriptor<FReal> inParticles[], +// const FSize inNbParticles, unsigned char* symbBuffer, const size_t inSymbSize, +// unsigned char* downBuffer, const size_t inDownSize){ +// GroupContainerClass leaf(symbBuffer, downBuffer); +// leaf.init(inIndex, inParticles, inNbParticles, inSymbSize, inDownSize); +// }, +// [](const MortonIndex /*mindex*/, +// unsigned char* symbBuff, const size_t /*symbSize*/, +// unsigned char* upBuff, const size_t /*upSize*/, +// unsigned char* downBuff, const size_t /*downSize*/, + // const int /*inLevel*/){ +// GroupCellClass cell(symbBuff, upBuff, downBuff); +// } // false, true); // GroupOctreeClass groupedTree(NbLevels, loader.getBoxWidth(), loader.getCenterOfBox(), groupSize, // cellSymbSizePerLevel.get(), cellUpSizePerLevel.get(), cellDownSizePerLevel.get(), @@ -288,6 +317,19 @@ int main(int argc, char* argv[]){ // const FSize inNbParticles, size_t* inSymbSize, size_t* inDownSize){ // GroupContainerClass::GetSizeFunc(inIndex, inParticles, inNbParticles, inSymbSize, inDownSize); // }, +// [](const MortonIndex inIndex, const UnknownDescriptor<FReal> inParticles[], +// const FSize inNbParticles, unsigned char* symbBuffer, const size_t inSymbSize, +// unsigned char* downBuffer, const size_t inDownSize){ +// GroupContainerClass leaf(symbBuffer, downBuffer); +// leaf.init(inIndex, inParticles, inNbParticles, inSymbSize, inDownSize); +// }, +// [](const MortonIndex /*mindex*/, +// unsigned char* symbBuff, const size_t /*symbSize*/, +// unsigned char* upBuff, const size_t /*upSize*/, +// unsigned char* downBuff, const size_t /*downSize*/, +// const int /*inLevel*/){ +// GroupCellClass cell(symbBuff, upBuff, downBuff); +// } // false, true, 0.2); groupedTree.printInfoBlocks(); diff --git a/Tests/noDist/testFmmAlgorithmBalanced.cpp b/Tests/noDist/testFmmAlgorithmBalanced.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a59bdaab005ea9f4cabdf122a28af928c7f5f2dd --- /dev/null +++ b/Tests/noDist/testFmmAlgorithmBalanced.cpp @@ -0,0 +1,134 @@ +// =================================================================================== +// Copyright ScalFmm 2011 INRIA, Olivier Coulaud, Bérenger Bramas, Matthias Messner +// olivier.coulaud@inria.fr, berenger.bramas@inria.fr +// This software is a computer program whose purpose is to compute the FMM. +// +// This software is governed by the CeCILL-C and LGPL licenses and +// abiding by the rules of distribution of free software. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public and CeCILL-C Licenses for more details. +// "http://www.cecill.info". +// "http://www.gnu.org/licenses". +// =================================================================================== + +// ==== CMAKE ==== +// Keep in private GIT +// @SCALFMM_PRIVATE + + +#include <string> + +#include "Files/FFmaGenericLoader.hpp" +#include "Files/FRandomLoader.hpp" +#include "Containers/FOctree.hpp" + +// Cell +//#include "Components/FBasicCell.hpp" +//#include "Kernels/Chebyshev/FChebCell.hpp" +#include "Components/FTestCell.hpp" + +// Particle Container +//#include "Components/FBasicParticleContainer.hpp" +#include "Components/FTestParticleContainer.hpp" + +// Leaf +#include "Components/FSimpleLeaf.hpp" + +// Kernel +//#include "Kernels/Chebyshev/FChebSymKernel.hpp" +#include "Components/FTestKernels.hpp" +#include "BalanceTree/FChebSymCostKernel.hpp" + +// Algorithm +#include "Core/FFmmAlgorithm.hpp" +#include "BalanceTree/FFmmAlgorithmThreadBalanced.hpp" + +// Other +#include "BalanceTree/FCostZones.hpp" +#include "testFmmAlgorithmBalancedArgs.hpp" +#include "testFmmAlgorithmBalancedUtils.hpp" // last include, to shorten main file + +#define ORDER 7 + +using FReal = double; + +using CellClass = FCostCell<FTestCell>; +using ContainerClass = FTestParticleContainer<FReal>; +using LeafClass = FSimpleLeaf< FReal, ContainerClass >; +using OctreeClass = FOctree< FReal, CellClass, ContainerClass, LeafClass >; + +using MatrixKernelClass = FInterpMatrixKernelR<FReal>; +using BalanceKernelClass= FChebSymCostKernel<FReal, CellClass, ContainerClass, + MatrixKernelClass, ORDER, + OctreeClass>; + +using KernelClass = FTestKernels< CellClass, ContainerClass>; + +template < template <typename...> class T, class KernelClassT> +using FmmClass = T <OctreeClass, CellClass, ContainerClass, KernelClassT, LeafClass >; + + +const FReal epsilon = 1e-4; + + +int main(int argc, char** argv) +{ + // Handle arguments + loadFMAAndRunFMMArgs args(argc, argv); + + + /* Creating tree and insterting particles *********************************/ + FFmaGenericLoader<FReal> loader(args.inFileName().c_str()); + //FRandomLoader loader(20, 1, FPoint(0.5,0.5,0.5), 1); + OctreeClass tree(args.treeHeight(), + args.subTreeHeight(), + loader.getBoxWidth(), + loader.getCenterOfBox()); + + loadTree(tree, loader); + /**************************************************************************/ + + + /* Compute the cost of each tree cell *************************************/ + BalanceKernelClass balanceKernel(&tree, epsilon); + FmmClass<FFmmAlgorithm, BalanceKernelClass> costAlgo(&tree, &balanceKernel); + + costAlgo.execute(); + + if (args.verboseLevel() > 1) { + balanceKernel.printResults(std::cout); + } + /**************************************************************************/ + + std::cerr << ("Running the costzones algorithm") << std::endl; + /* Run the costzone algorithm *********************************************/ + FCostZones<OctreeClass, CellClass> costzones(&tree, args.zoneCount()); + costzones.run(); + + writeZones(args, costzones); + /**************************************************************************/ + std::cerr << ("Done") << std::endl; + + + /* Run the balanced algorithm *********************************************/ + + std::cout << "Running kernel" << std::endl; + KernelClass computeKernel; + FmmClass<FFmmAlgorithmThreadBalanced, KernelClass> fmmAlgo(&tree, &computeKernel, costzones.getZoneBounds(), costzones.getLeafZoneBounds()); + //FmmClass<FFmmAlgorithm, KernelClass> fmmAlgo(&tree, &computeKernel); + + fmmAlgo.execute(); + /**************************************************************************/ + + + /* Check the results ******************************************************/ + ValidateFMMAlgo<OctreeClass, CellClass, ContainerClass, LeafClass>(&tree); + + + return EXIT_SUCCESS; +} + + diff --git a/Tests/noDist/testFmmAlgorithmBalancedArgs.hpp b/Tests/noDist/testFmmAlgorithmBalancedArgs.hpp new file mode 100644 index 0000000000000000000000000000000000000000..8f1a3e75d5143223ed6f54f5ba476a5413a79902 --- /dev/null +++ b/Tests/noDist/testFmmAlgorithmBalancedArgs.hpp @@ -0,0 +1,122 @@ +// ==== CMAKE ==== +// Keep in private GIT +// @SCALFMM_PRIVATE + +#ifndef _LOADFMAANDRUNFMMARGS_HPP_ +#define _LOADFMAANDRUNFMMARGS_HPP_ + +#include <string> + +#include "Utils/FParameters.hpp" +#include "Utils/FParameterNames.hpp" + + +class loadFMAAndRunFMMArgs { + const int _treeHeightInit = 5; + const int _subTreeHeightInit = 1; + const int _zoneCountInit = 4; + const int _verboseInit = 0; + const char* _inFileNameInit = ""; + const char* _outFileNameInit = "balancetest"; + const char* _outFileNameExtInit = "csv"; + + int _argc; + char** _argv; + + const FParameterNames OutputFileBasename = + {{"--output-file-basename", "-fout-base"}, + "Output files' basename. One file is created for each level in " + "the tree. Each file has a level-in-tree based extension."}; + + const FParameterNames OutputFileExtension = + {{"--output-file-extention", "-fout-ext"}, + ("Output files extension. One file is created for each level in the" + " tree. Each file has a 'basename_$nbZones$z.$i$.extension' " + "extension where $i is the level. Default value is " + + std::string(_outFileNameExtInit) + ".").c_str()}; + + const FParameterNames ZoneCount = + {{"--zone-count","-z"},"Number of zones to create."}; + +public: + int treeHeight() const { + using namespace FParameterDefinitions; + using namespace FParameters; + + return getValue(_argc, _argv, OctreeHeight.options, _treeHeightInit); + } + + int subTreeHeight() const { + using namespace FParameterDefinitions; + using namespace FParameters; + + return getValue(_argc, _argv, OctreeSubHeight.options, + _subTreeHeightInit); + } + + int zoneCount() const { + using namespace FParameterDefinitions; + using namespace FParameters; + + return getValue(_argc, _argv, ZoneCount.options, _zoneCountInit); + } + + int verboseLevel() const { + using namespace FParameterDefinitions; + using namespace FParameters; + + return getValue(_argc, _argv, EnabledVerbose.options, _verboseInit); + } + + std::string inFileName() const { + using namespace FParameterDefinitions; + using namespace FParameters; + + return getStr(_argc, _argv, InputFile.options, _inFileNameInit); + } + + std::string outFileName() const { + using namespace FParameterDefinitions; + using namespace FParameters; + + return getStr(_argc, _argv, OutputFileBasename.options, _outFileNameInit); + } + + std::string outFileExt() const { + using namespace FParameterDefinitions; + using namespace FParameters; + + std::string ext = getStr(_argc, _argv, OutputFileExtension.options, + _outFileNameExtInit); + if ( ext.at(0) != '.' ) + return '.' + ext; + return ext; + } + + loadFMAAndRunFMMArgs(int argc, char** argv) : _argc(argc), _argv(argv) { + parse(); + } + + int parse() { + using namespace FParameterDefinitions; + using namespace FParameters; + + FHelpDescribeAndExit + (_argc, _argv, + "Loads an FMA file into a tree and runs a pseudo FMM algorithm " + "through it to compute load balancing.", + OctreeHeight, + OctreeSubHeight, + InputFile, + OutputFileBasename, + OutputFileExtension, + ZoneCount, + EnabledVerbose + ); + return 0; + } + +}; + + +#endif diff --git a/Tests/noDist/testFmmAlgorithmBalancedUtils.hpp b/Tests/noDist/testFmmAlgorithmBalancedUtils.hpp new file mode 100644 index 0000000000000000000000000000000000000000..2996bf8ed516d8fb544c6dc3b2fef900f6ec0aa6 --- /dev/null +++ b/Tests/noDist/testFmmAlgorithmBalancedUtils.hpp @@ -0,0 +1,103 @@ +// ==== CMAKE ==== +// Keep in private GIT +// @SCALFMM_PRIVATE + +#ifndef _LOADFMAANDRUNFMMUTILS_HPP_ +#define _LOADFMAANDRUNFMMUTILS_HPP_ + +#include <fstream> +#include <memory> +#include <assert.h> + +/** + * \brief Saves the costzones to files. + * + * One file is created per level, one particle is stored per line in the form : + * x,y,z,zone. + * + * \param args The args that were given to the program + * \param costzones The CostZones object that was used get the tree balance. + */ +template<class OctreeClass, class CellClass> +void writeZones(const loadFMAAndRunFMMArgs& args, const FCostZones <OctreeClass,CellClass>& costzones) +{ + const std::string outFileBaseName = args.outFileName(); + const std::string outFileExt = args.outFileExt(); + const int verboseLevel = args.verboseLevel(); + const int treeHeight = args.treeHeight(); + + auto zones = costzones.getZones(); + long unsigned int zoneCount = zones.size();//args.zoneCount(); + + std::cout << "Writing " << zoneCount << " zones." << std::endl; + + // GCC versions before 5.0 have not implemented move constructors to streams + // we use unique pointers to get around this problem. + std::vector<std::unique_ptr<std::ofstream>> outfiles; + for ( int levelIdx = 0; levelIdx < treeHeight; levelIdx++ ) { + std::unique_ptr<std::ofstream> out( + new std::ofstream( outFileBaseName + + "_" + std::to_string(zoneCount) + "z" + + "." + std::to_string(levelIdx) + + outFileExt)); + *out << "x,y,z,zone" << std::endl; + outfiles.push_back(std::move(out)); + } + + int zoneIdx = 0; + for ( auto zone : zones) { + for ( auto cell : zone) { + *(outfiles[cell.first]) << cell.second->getCoordinate().getX() << ","; + *(outfiles[cell.first]) << cell.second->getCoordinate().getY() << ","; + *(outfiles[cell.first]) << cell.second->getCoordinate().getZ() << ","; + *(outfiles[cell.first]) << zoneIdx << std::endl; + } + zoneIdx++; + } + + if ( verboseLevel > 0) { + auto& zonebounds = costzones.getZoneBounds(); + zoneIdx = 0; + for ( auto zone : zonebounds ) { + std::cout << std::endl << "Zone " << zoneIdx << std::endl; + int level = 0; + for ( auto levelbounds : zone ) { + std::cout << "Level" << level << " : [" << levelbounds.first << ":" << levelbounds.second << "]\n"; + level++; + } + zoneIdx++; + } + } +} + + +/** + * \brief Loads a tree from a loader. + * \param tree The the to load into. + * \param loader The loader to load from. + */ +template <typename FReal, class OctreeClass> +void loadTree(OctreeClass& tree, FFmaGenericLoader<FReal>& loader) +{ + FReal physicalValue; + FPoint<FReal> particlePosition; + // insertion + for ( int idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart ) { + loader.fillParticle(&particlePosition, &physicalValue); + tree.insert(particlePosition); + } +} + + +template <typename FReal, class OctreeClass> +void loadTree(OctreeClass& tree, FRandomLoader<FReal>& loader) +{ + FPoint<FReal> particlePosition; + // insertion + for ( int idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart ) { + loader.fillParticle(&particlePosition); + tree.insert(particlePosition); + } +} + +#endif diff --git a/UTests/utestBoolArray.cpp b/UTests/utestBoolArray.cpp index abf7873ae1119698ee1bab26001ea012ae42b170..7f830df2218e6177dcc46756ff9e19cbc250bd11 100644 --- a/UTests/utestBoolArray.cpp +++ b/UTests/utestBoolArray.cpp @@ -26,66 +26,70 @@ /** this class test the bool array container */ class TestArray : public FUTester<TestArray> { - void TestGetSet(){ - FBoolArray array(500); - for(int idx = 0 ; idx < 500 ; ++idx){ - uassert(!array.get(idx)); - } - - for(int idx = 0 ; idx < 500 ; ++idx){ - array.set(idx, true); - uassert(array.get(idx)); - array.set(idx, false); - uassert(!array.get(idx)); - } - - for(int idx = 0 ; idx < 500 ; ++idx){ - array.set(idx, true); - } - array.setToZeros(); - for(int idx = 0 ; idx < 500 ; ++idx){ - uassert(!array.get(idx)); - } - } - - void TestGetSet2(){ - FBoolArray array(100); - - for(int idx = 0 ; idx < 100 ; ++idx){ - if(idx%3){ - array.set(idx, true); - uassert(array.get(idx)); - } - else{ - uassert(!array.get(idx)); - } - } + void TestGetSet(){ + FBoolArray array(500); + for(int idx = 0 ; idx < 500 ; ++idx){ + uassert(!array.get(idx)); } - void TestEqual(){ - FBoolArray array1(10); - FBoolArray array2(10); + for(int idx = 0 ; idx < 500 ; ++idx){ + array.set(idx, true); + uassert(array.get(idx)); + array.set(idx, false); + uassert(!array.get(idx)); + } + for(int idx = 0 ; idx < 500 ; ++idx){ + array.set(idx, true); + } + array.setToZeros(); + for(int idx = 0 ; idx < 500 ; ++idx){ + uassert(!array.get(idx)); + } + array.setToOnes(); + for(int idx = 0 ; idx < 500 ; ++idx){ + uassert(array.get(idx)); + } + } + + void TestGetSet2(){ + FBoolArray array(100); + + for(int idx = 0 ; idx < 100 ; ++idx){ + if(idx%3){ + array.set(idx, true); + uassert(array.get(idx)); + } + else{ + uassert(!array.get(idx)); + } + } + } - uassert(array1 == array2); + void TestEqual(){ + FBoolArray array1(10); + FBoolArray array2(10); - array1.set(1, true); - uassert(array1 != array2); - array2.set(1, true); - uassert(array1 == array2); + uassert(array1 == array2); - array1.set(5, true); - array2 = array1; - uassert(array1 == array2); - } - - // set test - void SetTests(){ - AddTest(&TestArray::TestGetSet,"Test Get & Set"); - AddTest(&TestArray::TestGetSet2,"Test Get & Set 2"); - AddTest(&TestArray::TestEqual,"Test Equal"); - } + array1.set(1, true); + uassert(array1 != array2); + + array2.set(1, true); + uassert(array1 == array2); + + array1.set(5, true); + array2 = array1; + uassert(array1 == array2); + } + + // set test + void SetTests(){ + AddTest(&TestArray::TestGetSet,"Test Get & Set"); + AddTest(&TestArray::TestGetSet2,"Test Get & Set 2"); + AddTest(&TestArray::TestEqual,"Test Equal"); + } }; // You must do this diff --git a/paraviewscript.py b/paraviewscript.py new file mode 100644 index 0000000000000000000000000000000000000000..c81e485645d324c33887dc198c42edd0cc94b6f9 --- /dev/null +++ b/paraviewscript.py @@ -0,0 +1,26 @@ +from paraview import * + + +reader = GetActiveSource() +# SVReader(DetectNumericColumns = True, FieldDelimiterCharacters = ",", HaveHeaders = True, FileName = "/home/qkhan/work/scalfmm/Build/20k10z.4.csv") + +#classRef = CSVReader() + +#if not isinstance(reader, classRef.__class__): +# exit(-1) + +filename = reader.FileName[0] +print filename +nbZones = int(filename.split('.')[0].split('_')[-1][0:-1]) + +selection = SelectionQuerySource(FieldType = "ROW", QueryString = "zone >= 0") +extractor = ExtractSelection(Input = reader, Selection = selection) +points = TableToPoints(Input = extractor, XColumn = "x", YColumn = "y", ZColumn = "z") + +repr = GetRepresentation() +repr.ColorArrayName = 'zone' +repr.LookupTable = AssignLookupTable(points.PointData['zone'], "Cool to Warm") +Show() + +#for i in range(nbZones): +