Commit 8c822839 authored by Florent Pruvost's avatar Florent Pruvost
parents c509e9d7 3f5897af
......@@ -92,7 +92,8 @@ typedef enum scalfmm_algorithm_config {
multi_thread = 1, /* Use the Multi thread version of Scalfmm*/
periodic = 2, /* Use the periodic version of Scalfmm*/
source_target = 3, /* USe the source/target algorithm */
adaptiv = 4 /*Temporary*/
adaptiv = 4, /*Temporary*/
mpi = 5
} scalfmm_algorithm;
......@@ -135,12 +136,36 @@ scalfmm_handle scalfmm_init( scalfmm_kernel_type KernelType,scalfmm_algorithm al
typedef void* (*Callback_init_cell)(int level, long long morton_index, int* tree_position, double* spatial_position, void * inDatas);
/**
* Function to destroy what have bee initialized by the user (should
* be give in Scalfmm_dealloc_handle)
* @brief Function to destroy what have bee initialized by the user
* (should be give in Scalfmm_dealloc_handle)
*/
typedef void (*Callback_free_cell)(void*);
/**
* @brief Callback used to know the size of userData.
* @param level current level of current cell
* @param userData Datas that will be serialize
* @param morton_index of the current cell
*/
typedef FSize (*Callback_get_cell_size)(int level, void * userDatas, long long morton_index);
/**
* @brief Callback used to serialize userdata inside an array of size
* given above.
* @param level current level of current cell
* @param userData Datas that will be serialize
* @param morton_index of the current cell
*/
typedef void (*Callback_copy_cell)(void * userDatas, FSize size, void * memoryAllocated);
/**
* @brief Callback used to initialize again userDat from what's have
* been stored inside an array with Callback_copy_cell.
*/
typedef void * (*Callback_restore_cell)(int level, void * arrayTobeRead);
/**
* @brief Structure containing user's call_backs in order to
* initialize/free the cell's user data.
......@@ -148,6 +173,9 @@ typedef void (*Callback_free_cell)(void*);
typedef struct User_Scalfmm_Cell_Descriptor{
Callback_free_cell user_free_cell;
Callback_init_cell user_init_cell;
Callback_get_cell_size user_get_size;
Callback_copy_cell user_copy_cell;
Callback_restore_cell user_restore_cell;
}Scalfmm_Cell_Descriptor;
......@@ -448,7 +476,7 @@ typedef void (*Callback_M2L_Ext)(int level, void* targetCell, void* sourceCell,
* @param sourceCell array of cell to be read
* @param userData datas specific to the user's kernel
*/
typedef void (*Callback_M2LFull)(int level, void* targetCell, void* sourceCell[343], void* userData);
typedef void (*Callback_M2LFull)(int level, void* targetCell, const int * neighborPosition, const int size, void** sourceCell, void* userData);
/**
* @brief Function to be filled by user's L2L
......@@ -482,17 +510,20 @@ typedef void (*Callback_P2P)(FSize nbParticles, const FSize* particleIndexes, FS
/**
* @brief Function to be filled by user's P2P
* @attention This function is symmetrc, thus when a call is done
* between target and cell[27], the user needs to apply the target
* field onto the 27 cells too.
* between target and neighbors cell, the user needs to apply the target
* field onto the neighbors cells too.
* @param nbParticles number of particle in current leaf
* @param particleIndexes indexes of particles currently computed
* @param sourceCell array of the neighbors source cells
* @param sourceParticleIndexes array of indices of source particles currently computed
* @param sourceNbPart array containing the number of part in each neighbors
* @param sourcePosition array containing relative position of the neighbor
* @param size : size of the arrays (thus, number of existing neighbor cell)
* @param userData datas specific to the user's kernel
*/
typedef void (*Callback_P2PFull)(FSize nbParticles, const FSize* particleIndexes,
const FSize * sourceParticleIndexes[27],FSize sourceNbPart[27], void* userData);
const FSize ** sourceParticleIndexes,FSize * sourceNbPart,
const int * sourcePosition, const int size, void* userData);
/**
......@@ -630,6 +661,10 @@ int scalfmm_get_nb_timers(scalfmm_handle handle);
*/
void scalfmm_get_timers(scalfmm_handle handle,double * Timers);
/////////////////////////////////////////////////////////////////////
/////////////// Algorithms functions /////////////////
/////////////////////////////////////////////////////////////////////
/**
* @brief Set the upper limit int the tree for applying FMM : standard
......@@ -640,5 +675,61 @@ void scalfmm_get_timers(scalfmm_handle handle,double * Timers);
*/
void scalfmm_set_upper_limit(scalfmm_handle handle, int upperLimit);
/////////////////////////////////////////////////////////////////////
/////////////// Distributed Version //////////////////
/////////////////////////////////////////////////////////////////////
#ifdef SCALFMM_USE_MPI
#warning "IS_THAT_REALLY_WORKING"
//YES
/**
* @brief Init scalfmm library with MPI
* @param Same as in Init
* @param comm Mpi Communicator
*/
scalfmm_handle scalfmm_init_distributed( scalfmm_kernel_type KernelType,scalfmm_algorithm algo, const MPI_Comm comm);
/**
* @brief Those function are to be called before the insert method
* @param Handle scalfmm_handle provided by scalfmm_init_distributed.
* @param nbPoints Number of particles (if local, then it's the number
* of particles given to that proc, if global, then it's the total
* number of particles)
* @param particleXYZ Array of Position. The size is in both case nbPoints.
* @param localArrayFilled Array that will be filled with particles
* once the partitionning done. Can be inserted with no changes.
* @param indexesFilled Array that store the global index of each part
* in the localArrayFilled.
* @param stride stride between two attributes inside attr.
* @param attr array of attribute to be distributed alongside the positions.
*/
void scalfmm_create_local_partition(scalfmm_handle handle, int nbPoints, double * particleXYZ, double ** localArrayFilled,
FSize ** indexesFilled, FSize * outputNbPoint);
void scalfmm_create_global_partition(scalfmm_handle handle, int nbPoints, double * particleXYZ, double ** localArrayFilled,
FSize ** indexesFilled, FSize * outputNbPoint);
/**
* @brief Once the partition done, one can call this fct in order to
* partition "things" following the same scheme. Note that arrays must
* be in the same order as the original parts.
* @param handle scalfmm_handle provided by scalfmm_init_distributed.
* @param nbThings number of items
* @param sizeofthing size of ONE item
* @param arrayOfThing array of items to be sorted/partitionned
* @param newArray output array
*/
void scalfmm_generic_partition(scalfmm_handle handle, FSize nbThings, size_t sizeofthing, void * arrayOfThing, void ** newArray);
/**
* @brief This fct will call delete on its arg, in order to free the
* memory allocated inside scalfmm, but given back to the user.
*/
void scalfmm_call_delete(void * array);
#endif
#endif
......@@ -832,6 +832,15 @@ public:
FAssertLF(0,"This feature is not available with Chebyshev Kernel, please use your own kernel or do not use it.\n Exiting anyways...\n");
}
virtual void create_local_partition(int nbPoints, double * particleXYZ, double ** localArrayFilled, FSize ** indexesFilled, FSize * outputNbPoint){
FAssertLF(0,"Either no MPI used or wrong initiation function called\n");
}
virtual void create_global_partition(int nbPoints, double * particleXYZ, double ** localArrayFilled, FSize ** indexesFilled, FSize * outputNbPoint){
FAssertLF(0,"Either no MPI used or wrong initiation function called\n");
}
virtual void generic_partition(FSize nbThings, size_t sizeofthing, void * arrayOfThing, void ** newArray){
FAssertLF(0,"Either no MPI used or wrong initiation function called\n");
}
};
template<class FReal>
......@@ -1057,4 +1066,21 @@ extern "C" void scalfmm_set_upper_limit(scalfmm_handle Handle, int upperLimit){
((ScalFmmCoreHandle<double> * ) Handle)->engine->set_upper_limit(upperLimit);
}
#ifdef SCALFMM_USE_MPI
extern "C" void scalfmm_create_local_partition(scalfmm_handle handle, int nbPoints, double * particleXYZ, double ** localArrayFilled,
FSize ** indexesFilled, FSize * outputNbPoint){
((ScalFmmCoreHandle<double> * ) handle)->engine->create_local_partition(nbPoints,particleXYZ,localArrayFilled,indexesFilled,outputNbPoint);
}
extern "C" void scalfmm_create_global_partition(scalfmm_handle handle, int nbPoints, double * particleXYZ, double ** localArrayFilled,
FSize ** indexesFilled, FSize * outputNbPoint){
((ScalFmmCoreHandle<double> * ) handle)->engine->create_global_partition(nbPoints,particleXYZ,localArrayFilled,indexesFilled,outputNbPoint);
}
extern "C" void scalfmm_generic_partition(scalfmm_handle handle, FSize nbThings, size_t sizeofthing, void * arrayOfThing, void ** newArray){
((ScalFmmCoreHandle<double> * ) handle)->engine->generic_partition(nbThings,sizeofthing,arrayOfThing,newArray);
}
#endif
#endif
......@@ -6,8 +6,9 @@ extern "C" {
#include "FInterEngine.hpp"
#include "FUserKernelEngine.hpp"
extern "C" scalfmm_handle scalfmm_init(/*int TreeHeight,double BoxWidth,double* BoxCenter, */scalfmm_kernel_type KernelType,
extern "C" scalfmm_handle scalfmm_init(/*int TreeHeight,double BoxWidth,
double* BoxCenter, */
scalfmm_kernel_type KernelType,
scalfmm_algorithm algo){
ScalFmmCoreHandle<double> * handle = new ScalFmmCoreHandle<double>();
typedef double FReal;
......@@ -19,7 +20,7 @@ extern "C" scalfmm_handle scalfmm_init(/*int TreeHeight,double BoxWidth,double*
typedef FP2PParticleContainerIndexed<FReal> ContainerClass;
typedef FTypedLeaf<FReal,ContainerClass> LeafClass;
handle->engine = new FUserKernelEngine<FReal,LeafClass>(/*TreeHeight, BoxWidth, BoxCenter, */KernelType);
handle->engine = new FUserKernelEngine<FReal,LeafClass>(/*TreeHeight, BoxWidth, BoxCenter, */KernelType,algo);
break;
case 1:
......@@ -61,7 +62,7 @@ extern "C" scalfmm_handle scalfmm_init(/*int TreeHeight,double BoxWidth,double*
typedef FP2PParticleContainerIndexed<FReal> ContainerClass;
typedef FSimpleLeaf<FReal,ContainerClass> LeafClass;
handle->engine = new FUserKernelEngine<FReal,LeafClass>(/*TreeHeight, BoxWidth, BoxCenter, */KernelType);
handle->engine = new FUserKernelEngine<FReal,LeafClass>(/*TreeHeight, BoxWidth, BoxCenter, */KernelType,algo);
break;
case 1:
......@@ -102,6 +103,33 @@ extern "C" void scalfmm_dealloc_handle(scalfmm_handle handle, Callback_free_cell
delete (ScalFmmCoreHandle<double> *) handle;
}
/**
* @brief Init function for distributed version (MPI).
*
*/
#ifdef SCALFMM_USE_MPI
#include "Utils/FMpi.hpp"
#include "FUserKernelDistrEngine.hpp"
extern "C" scalfmm_handle scalfmm_init_distributed( scalfmm_kernel_type KernelType,
scalfmm_algorithm algo,
const MPI_Comm comm){
ScalFmmCoreHandle<double> * handle = new ScalFmmCoreHandle<double>();
//Only the User Defined Kernel version (UDK) is available.
typedef double FReal;
typedef FP2PParticleContainerIndexed<FReal> ContainerClass;
typedef FSimpleLeaf<FReal,ContainerClass> LeafClass;
handle->engine = (new FUserKernelDistrEngine<FReal,LeafClass>(KernelType,algo,comm));
return handle;
}
#endif
/**
* This parts implements all the function defined in FChebInterface.h
* using the Chebyshev classes
......@@ -229,21 +257,17 @@ extern "C" void ChebKernel_M2M(int level, void* parentCell, int childPosition,
parentChebCell->getMultipole(0));
}
extern "C" void ChebKernel_M2L(int level, void* targetCell, void* sourceCell[343], void* inKernel){
extern "C" void ChebKernel_M2L(int level, void* targetCell, const int*neighborPositions,int size,
void** sourceCell, void* inKernel){
//Get our structures
ChebCellStruct * targetCellStruct = reinterpret_cast<ChebCellStruct *>(targetCell);
//get real cheb cell
FChebCell<double,7>* const targetChebCell = targetCellStruct->cell;
//copy to an array of FChebCell
const FChebCell<double,7>* arrayOfChebCell[343];
for(int i=0; i<343 ; ++i){
if(sourceCell[i] != nullptr){
arrayOfChebCell[i] = reinterpret_cast<ChebCellStruct*>(sourceCell[i])->cell;
}
else{
arrayOfChebCell[i] = nullptr;
}
FChebCell<double,7> const ** arrayOfChebCell = new const FChebCell<double,7>*[size];
for(int i=0; i<size ; ++i){
arrayOfChebCell[i] = reinterpret_cast<ChebCellStruct*>(sourceCell[i])->cell;
}
//Identify thread number
......@@ -251,7 +275,8 @@ extern "C" void ChebKernel_M2L(int level, void* targetCell, void* sourceCell[34
//Get the kernel
ChebKernelStruct * inKernelStruct = reinterpret_cast<UserData*>(inKernel)->kernelStruct;
inKernelStruct->kernel[id_thread]->M2L(targetChebCell,arrayOfChebCell,0,level);
inKernelStruct->kernel[id_thread]->M2L(targetChebCell,arrayOfChebCell,neighborPositions,size,level);
delete [] arrayOfChebCell;
}
extern "C" void ChebKernel_L2L(int level, void* parentCell, int childPosition, void* childCell, void* inKernel){
......@@ -314,8 +339,8 @@ extern "C" void ChebKernel_L2P(void* leafCell, FSize nbParticles, const FSize* p
}
void ChebKernel_P2P(FSize nbParticles, const FSize* particleIndexes,
const FSize * sourceParticleIndexes[27],FSize sourceNbPart[27],void* inKernel){
void ChebKernel_P2P(FSize nbParticles, const FSize* particleIndexes, const FSize ** sourceParticleIndexes,FSize* sourceNbPart,
const int * sourcePosition,const int size, void* inKernel){
//Create temporary FSimpleLeaf for target
FP2PParticleContainerIndexed<double>* tempContTarget = new FP2PParticleContainerIndexed<double>();
......@@ -328,9 +353,9 @@ void ChebKernel_P2P(FSize nbParticles, const FSize* particleIndexes,
tempContTarget->push(pos,particleIndexes[i],Phi);
}
//Create 27 FSimpleLeaf for 27 sources
FP2PParticleContainerIndexed<double>* tempContSources[27];
for(int idSource=0; idSource<27 ; ++idSource){
//Create size FSimpleLeaf for the size sources
FP2PParticleContainerIndexed<double>** tempContSources = new FP2PParticleContainerIndexed<double>*[size];
for(int idSource=0; idSource<size ; ++idSource){
if(sourceNbPart[idSource] != 0){
//Create container
tempContSources[idSource] = new FP2PParticleContainerIndexed<double>();
......@@ -362,7 +387,7 @@ void ChebKernel_P2P(FSize nbParticles, const FSize* particleIndexes,
//Empty tree coordinate
int coord[3] = {0,0,0};
inKernelStruct->kernel[id_thread]->P2P(FTreeCoordinate(coord),tempContTarget,nullptr,tempContSources,0);
inKernelStruct->kernel[id_thread]->P2P(FTreeCoordinate(coord),tempContTarget,nullptr,tempContSources,sourcePosition,size);
//get back forces & potentials
double * forcesToFill = reinterpret_cast<UserData *>(inKernel)->forcesComputed[id_thread];
......@@ -378,7 +403,7 @@ void ChebKernel_P2P(FSize nbParticles, const FSize* particleIndexes,
//Note that sources are also modified.
//get back sources forces
for(int idSource = 0 ; idSource < 27 ; ++idSource){
for(int idSource = 0 ; idSource < size ; ++idSource){
const FVector<FSize>& indexesSource = tempContSources[idSource]->getIndexes();
const FSize nbPartInSource = sourceNbPart[idSource];
for(int idxSourcePart = 0; idxSourcePart < nbPartInSource ; ++idxSourcePart){
......@@ -390,10 +415,11 @@ void ChebKernel_P2P(FSize nbParticles, const FSize* particleIndexes,
}
//Release memory
for(int idSource=0; idSource<27 ; ++idSource){
for(int idSource=0; idSource<size ; ++idSource){
if(tempContSources[idSource]) delete tempContSources[idSource];
}
delete tempContTarget;
delete tempContTarget;
delete [] tempContSources;
}
void ChebCell_reset(int level, long long morton_index, int* tree_position, double* spatial_position, void * userCell,void * inKernel){
......@@ -402,4 +428,78 @@ void ChebCell_reset(int level, long long morton_index, int* tree_position, doubl
chebCell->resetToInitialState();
}
FSize ChebCell_getSize(int level,void * userData, long long morton_index){
//Create fake one and ask for its size
FChebCell<double,7>* chebCell = new FChebCell<double,7>();
//then return what size is needed to store a cell
FSize sizeToReturn = chebCell->getSavedSize();
delete chebCell;
return sizeToReturn;
}
/**
* This is what the memory looks like
* : [Morton] [Coord][Multipole] [Local]
* : [1*longlong][3*int][double*VectorSize][double*VectorSize]
*/
void ChebCell_copy(void * userDatas, FSize size, void * memoryAllocated){
//First cast inside outr struct
ChebCellStruct * cellStruct = reinterpret_cast<ChebCellStruct *>(userDatas);
//Then get the FChebCell
FChebCell<double,7>* chebCell = cellStruct->cell;
//I know for sure there is enough place inside memory Allocated
FSize cursor = 0;
char * toWrite = static_cast<char * >(memoryAllocated);
//Morton first
long long here;
here = chebCell->getMortonIndex();
memcpy(&toWrite[cursor],&here, sizeof(long long));
cursor += sizeof(long long);
//FTreeCordinate then
int coord[3] = {chebCell->getCoordinate().getX(),
chebCell->getCoordinate().getY(),
chebCell->getCoordinate().getZ()};
memcpy(&toWrite[cursor],coord, sizeof(int)*3);
cursor += 3*sizeof(int);
//Upward datas
memcpy(&toWrite[cursor],chebCell->getMultipole(0),chebCell->getVectorSize()*sizeof(double));
cursor += sizeof(double)*chebCell->getVectorSize();
//Downward datas
memcpy(&toWrite[cursor],chebCell->getLocal(0),chebCell->getVectorSize()*sizeof(double));
cursor += sizeof(double)*chebCell->getVectorSize();
}
/**
* This is what the memory looks like
* : [Morton] [Coord][Multipole] [Local]
* : [1*longlong][3*int][double*VectorSize][double*VectorSize]
*/
void* ChebCell_restore(int level, void * arrayTobeRead){
FSize cursor = 0;
//First read Morton
long long morton = (static_cast<long long *>(arrayTobeRead))[0];
cursor += sizeof(long long);
//Then Coord :
int * coord = reinterpret_cast<int*>(&(static_cast<char*>(arrayTobeRead))[cursor]);
cursor += sizeof(int)*3;
//Create target Cell and Struct
ChebCellStruct * cellStruct = ChebCellStruct_create(morton,coord);
//Then copy inside this cell the Multipole and the Local
double * mult = reinterpret_cast<double*>(&(static_cast<char*>(arrayTobeRead))[cursor]);
memcpy((cellStruct->cell)->getMultipole(0),mult,((cellStruct->cell)->getVectorSize())*sizeof(double));
cursor += (cellStruct->cell)->getVectorSize()*sizeof(double);
double * local = reinterpret_cast<double*>(&(static_cast<char*>(arrayTobeRead))[cursor]);
memcpy((cellStruct->cell)->getLocal(0),local,((cellStruct->cell)->getVectorSize())*sizeof(double));
cursor += (cellStruct->cell)->getVectorSize()*sizeof(double);
//Yeah, can return, finally !!
return cellStruct;
}
#endif
#ifndef FUSERKERNELDISTRENGINE_HPP
#define FUSERKERNELDISTRENGINE_HPP
#include "vector"
#include <algorithm>
/**
* @file FUserKernelDistrEngine.hpp
* @brief This file add MPI features to FUserKernelEngine, by reimplement or implement new methods.
*/
#include "Utils/FMpi.hpp"
#include "FUserKernelEngine.hpp"
#include "BalanceTree/FLeafBalance.hpp"
#include "Files/FMpiTreeBuilder.hpp"
#include "Containers/FVector.hpp"
#include "Core/FFmmAlgorithmThreadProc.hpp"
class CoreCellDist : public CoreCell, public FAbstractSendable{
int level;
public:
CoreCellDist() : level(-1){
}
/**
* To save the current cell into a buffer in order to send it.
*/
template <class BufferWriterClass>
void save(BufferWriterClass& buffer) const{
FBasicCell::save<BufferWriterClass>();
buffer << level;
if(user_cell_descriptor.user_get_size){
FSize sizeToSave = user_cell_descriptor.user_get_size(level,CoreCell::getContainer(),getMortonIndex());
char * temp = new char[sizeToSave];
user_cell_descriptor.user_copy_cell(CoreCell::getContainer(),sizeToSave,(void *) temp);
buffer.write(temp,sizeToSave);
delete[] temp;
}
}
/**
* To "create" a cell from an array in order to receive cells.
*/
template <class BufferReaderClass>
void restore(BufferReaderClass& buffer){
FBasicCell::restore<BufferReaderClass>();
buffer >> level;
if(user_cell_descriptor.user_restore_cell){
FSize sizeToSave = user_cell_descriptor.user_get_size(level,CoreCell::getContainer(),getMortonIndex());
char * temp = new char[sizeToSave];
buffer.fillArray(temp,sizeToSave);
CoreCell::setContainer(user_cell_descriptor.user_restore_cell(level,temp));
delete[] temp;
}
}
/**
* @brief Part where we reimplement {de,}serialize{Up,Down}
*/
template <class BufferWriterClass>
void serializeUp(BufferWriterClass& buffer) const {
FSize sizeToSave = user_cell_descriptor.user_get_size(level,CoreCell::getContainer(),getMortonIndex());
char * temp = new char[sizeToSave];
user_cell_descriptor.user_copy_cell(CoreCell::getContainer(),sizeToSave,(void *) temp);
buffer.write(temp,sizeToSave);
delete[] temp;
}
template <class BufferWriterClass>
void serializeDown(BufferWriterClass& buffer) const {
FSize sizeToSave = user_cell_descriptor.user_get_size(level,CoreCell::getContainer(),getMortonIndex());
char * temp = new char[sizeToSave];
user_cell_descriptor.user_copy_cell(CoreCell::getContainer(),sizeToSave,(void *) temp);
buffer.write(temp,sizeToSave);
delete[] temp;
}
template <class BufferReaderClass>
void deserializeUp(BufferReaderClass& buffer) const {
FSize sizeToSave = user_cell_descriptor.user_get_size(level,CoreCell::getContainer(),getMortonIndex());
char * temp = new char[sizeToSave];
buffer.fillArray(temp,sizeToSave);
CoreCell::setContainer(user_cell_descriptor.user_restore_cell(level,temp));
delete[] temp;
}
template <class BufferReaderClass>
void deserializeDown(BufferReaderClass& buffer) const {
FSize sizeToSave = user_cell_descriptor.user_get_size(level,CoreCell::getContainer(),getMortonIndex());
char * temp = new char[sizeToSave];
buffer.fillArray(temp,sizeToSave);
CoreCell::setContainer(user_cell_descriptor.user_restore_cell(level,temp));
delete[] temp;
}
/**
* @brief Function to get the size of a cell : (size of user datas
* + size of internal data)
*/
FSize getSavedSize() const {
FSize toReturn = user_cell_descriptor.user_get_size(level,
CoreCell::getContainer(),
getMortonIndex())
+ FBasicCell::getSavedSize() //Size of storage needed for Basic Cell
+ sizeof(int) //Size of storage needed for this class
+ sizeof(nullptr); //Size of storage needed for the parent class
return toReturn;
}
FSize getSavedSizeUp() const{
FSize toReturn = user_cell_descriptor.user_get_size(level,
CoreCell::getContainer(),
getMortonIndex());
return toReturn;
}
FSize getSavedSizeDown() const{
FSize toReturn = user_cell_descriptor.user_get_size(level,
CoreCell::getContainer(),
getMortonIndex());
return toReturn;
}
};
template<class FReal, class LeafClass>
class FUserKernelDistrEngine: public FUserKernelEngine<FReal,LeafClass>{
private:
using Parent = FUserKernelEngine<FReal,LeafClass>;
//Same as in Parent class.
using ContainerClass = FP2PParticleContainerIndexed<FReal>;
using OctreeClass = FOctree<FReal,CoreCellDist,ContainerClass,LeafClass>;
using CoreKernelClass = CoreKernel<CoreCellDist,ContainerClass>;
/**
* @brief Class for convert global indices to local indices.
*/
class Indirection{
private:
//Keep track of number of local points
FSize nbPoints;
//Keep track of global indexes
std::vector<FSize> indirection;
//Number of part per proc before partitionning
std::vector<FSize> oriPartPerProc;
//Number of part per proc after Partitionning
std::vector<FSize> partPerProc;
//Intervals of part owned by each proc before partitionning
std::vector< std::pair<FSize,FSize> > oriIntervalPerProc;
std::vector< std::vector<FSize> > * mapComm;
//In order to send again following the first partition, we
//store everything to do the Alltoall
std::vector<FSize> orderToSortToSend;
//This will hold the indices of original parts in order to be
//sent directly
std::vector<FSize> initialArray;
std::vector<FSize> currentArray;
std::vector<FSize> currentArraySorted;
std::vector<int> howManyToRecv;
std::vector<int> howManyToSend;
std::vector<int> displRecv;
std::vector<int> displSend;
public:
Indirection() : indirection(0), oriPartPerProc(0), partPerProc(0), oriIntervalPerProc(0), mapComm(nullptr), initialArray(0){
}
~Indirection(){
nbPoints=0;
if(mapComm){
delete mapComm;
}
}
/**
*@brief Return the global (original) index
*/
FSize getGlobalInd(FSize localInd) const {
return indirection[localInd];
}
/**
*@brief Return the local (to this process) index
*/
FSize getLocalInd(FSize globalInd) const {
for(auto ite = indirection.cbegin(); ite != indirection.cend() ; ++ite){
if(*ite == globalInd){
return (ite - indirection.cbegin());
}
}
//will cause a segmentation fault
return -1;
}
/**
* Get the current number of particles
*/
FSize getLocalSize() const {
return ((FSize) indirection.size());
}
/**
* Add a global index
*/
void addPoint(FSize globalInd){
indirection.push_back(globalInd);
++nbPoints;
}
//Add multiple global indices
void addArrayPoints(FSize inNbPoint, FSize * globalIndices){
for(int ite = 0; ite < inNbPoint ; ++ite){
indirection().push_back(globalIndices[ite]);
}
nbPoints += inNbPoint;
}
void setNbProc(int inNbProc){
this->oriPartPerProc.resize(inNbProc);
this->partPerProc.resize(inNbProc);
this->oriIntervalPerProc.resize(inNbProc);
this->mapComm = new std::vector<std::vector<FSize> > (inNbProc,std::vector<FSize>(inNbProc));
}
void setOriIntervals(const FMpi::FComm * comm){
FSize acc = 0;
for(int i=0; i<comm->processCount() ; ++i){
oriIntervalPerProc[i] = std::make_pair(acc,acc+oriPartPerProc[i]-1);
//std::cout << "Rank "<< comm->processId() << "(acc= "<< acc <<",acc+oriPartPerproc["<<i<<"]-1= " << acc+oriPartPerProc[i]-1 << ")\n";
acc+=oriPartPerProc[i];
}
}
void setNbPartPerProc(FSize nbPart, const FMpi::FComm * comm){
if(partPerProc.size() < comm->processCount()){
setNbProc(comm->processCount());
}
MPI_Allgather(&nbPart, 1, MPI_LONG_LONG, partPerProc.data() , 1, MPI_LONG_LONG, comm->getComm());
}
void setOriNbPartPerProc(FSize nbPart, const FMpi::FComm * comm){
if(oriPartPerProc.size() < comm->processCount()){
setNbProc(comm->processCount());
}
MPI_Allgather(&nbPart, 1, MPI_LONG_LONG, oriPartPerProc.data(), 1, MPI_LONG_LONG, comm->getComm());
}
template<class ParticleSaver>
void setMap(const FMpi::FComm * comm,ParticleSaver& particles){
FSize * temp_map = new FSize[comm->processCount()*comm->processCount()];
memset(temp_map,0,sizeof(FSize)*comm->processCount()*comm->processCount());
//Each process write from temp[my_rank*nb_proc] to
//temp[(my_rank+1)*nb_proc - 1]
for(int idPart=0 ; idPart<getCount(comm->processId()) ; ++idPart){
temp_map[comm->processId()*comm->processCount() + particles[idPart].orOwner]++;
}
//Gather the Map
MPI_Allgather(&temp_map[comm->processId()*comm->processCount()], comm->processCount(),
MPI_LONG_LONG, temp_map, comm->processCount(), MPI_LONG_LONG, comm->getComm());
for(int idProc=0; idProc<comm->processCount() ; ++idProc){
for(int idProcSource=0; idProcSource<comm->processCount() ; ++idProcSource){
(*mapComm)[idProc][idProcSource] = temp_map[idProc*comm->processCount() + idProcSource];
}
}