Commit 93deeaed authored by Martin Khannouz's avatar Martin Khannouz Committed by Berenger Bramas

Add almost the same data mapping as in starpu mpi explicit.

parent 76f49d8e
......@@ -350,6 +350,31 @@ public:
unsigned int getDataType(){
return typeData[0]; }
/**
* Fills a particle from the current position in the file.
*
* @param outParticlePositions the position of particle to fill (FPoint<FReal> class)
* @param outPhysicalValue the physical value of particle to fill (FReal)
*/
void fillParticle(FPoint<FReal>*const outParticlePositions){
if(binaryFile){
file->read((char*)(outParticlePositions), sizeof(FReal)*3);
if(otherDataToRead> 0){
file->read((char*)(this->tmpVal), sizeof(FReal)*otherDataToRead);
}
} else {
FReal x,y,z;
(*this->file) >> x >> y >> z;
outParticlePositions->setPosition(x,y,z);
if(otherDataToRead> 0){
for (FSize i = 0 ; i <otherDataToRead; ++i){
(*this->file) >> x ;
}
}
}
}
/**
* Fills a particle from the current position in the file.
*
......@@ -494,6 +519,8 @@ private:
this->centerOfBox.setPosition(x,y,z);
this->boxWidth *= 2;
otherDataToRead = typeData[1] - (unsigned int)(4);
if(typeData[1] < 4)
otherDataToRead = 0;
};
void readBinaryHeader(){
std::cout << " File open in binary mode "<< std::endl;
......
......@@ -580,7 +580,6 @@ public:
// for(int idx = 0 ; idx < nbParticlesInArray ; ++idx){
// particleSaver->push(sortedParticlesArray[idx].particle);
// }
ParticleClass* particlesArrayInLeafOrder = nullptr;
FSize * leavesOffsetInParticles = nullptr;
FSize nbLeaves = 0;
......
......@@ -106,6 +106,53 @@ public:
(getRandom() * boxWidth) + centerOfBox.getY() - boxWidth/2,
(getRandom() * boxWidth) + centerOfBox.getZ() - boxWidth/2);
}
void fillParticleAtMortonIndex(FPoint<FReal>*const inParticlePositions, MortonIndex idx, unsigned int treeHeight){
MortonIndex mask = 0x1LL;
//Largeur de la boite au niveau des feuilles
FReal leafWidth = boxWidth / FReal(1<<(treeHeight-1));
//Décalage par rapport au centre de la moitié de la largeur de la boîte
FReal currentOffset = leafWidth / 2.0;
//Initialise x, y, z au centre de la boîte globale
FReal x, y, z;
x = centerOfBox.getX();
y = centerOfBox.getY();
z = centerOfBox.getZ();
//On va décaler le centre du père vers le centre du fils autant de fois qu'il y a de fils
//Comme ce sont des décalage succesif et plutôt indépendant, on peut commencer par les décalages au niveau des feuilles, ce qui est plus simple
for(unsigned int i = 0; i < treeHeight-1; ++i)
{
bool x_offset, y_offset, z_offset;
//Check le 1er bit qui correspond au z
z_offset = (idx & mask);
idx >>= 1;
//Check le 2nd bit qui correspond au y
y_offset = (idx & mask);
idx >>= 1;
//Check le 3ème bit qui correspond au x
x_offset = (idx & mask);
idx >>= 1;
//Décalage du x
if(x_offset)
x += currentOffset;
else
x -= currentOffset;
//Décalage du y
if(y_offset)
y += currentOffset;
else
y -= currentOffset;
//Décalage du z
if(z_offset)
z += currentOffset;
else
z -= currentOffset;
//On augmente les décallages au fur et à mesure que l'on remonte les étages
currentOffset *= 2;
}
inParticlePositions->setPosition( x, y, z);
}
/** Get a random number between 0 & 1 */
FReal getRandom() const{
......
This diff is collapsed.
This diff is collapsed.
// Keep in private GIT
#include <iostream>
#include <fstream>
#include <vector>
using namespace std;
#include "../../Src/Utils/FGlobal.hpp"
#include "../../Src/GroupTree/Core/FGroupTree.hpp"
#include "../../Src/GroupTree/Core/FGroupTreeBerenger.hpp"
#include "../../Src/Components/FSimpleLeaf.hpp"
#include "../../Src/Containers/FVector.hpp"
......@@ -36,15 +38,21 @@ using namespace std;
#include "../../Src/Files/FFmaGenericLoader.hpp"
#include "../../Src/Core/FFmmAlgorithm.hpp"
std::vector<MortonIndex> getMortonIndex(const char* const mapping_filename);
int main(int argc, char* argv[]){
setenv("STARPU_NCPU","1",1);
const FParameterNames LocalOptionBlocSize {
{"-bs"},
"The size of the block of the blocked tree"
};
const FParameterNames Mapping {
{"-map"} ,
"mapping  \\o/."
};
FHelpDescribeAndExit(argc, argv, "Test the blocked tree by counting the particles.",
FParameterDefinitions::OctreeHeight, FParameterDefinitions::NbParticles,
FParameterDefinitions::OctreeSubHeight, LocalOptionBlocSize);
FParameterDefinitions::OctreeSubHeight, FParameterDefinitions::InputFile, LocalOptionBlocSize, Mapping);
int provided = 0;
//MPI_Init_thread(&argc,&argv, MPI_THREAD_SERIALIZED, &provided);
......@@ -58,7 +66,7 @@ int main(int argc, char* argv[]){
typedef FGroupTestParticleContainer<FReal> GroupContainerClass;
typedef FGroupTree< FReal, GroupCellClass, GroupCellSymbClass, GroupCellUpClass, GroupCellDownClass,
typedef FGroupTreeBerenger< FReal, GroupCellClass, GroupCellSymbClass, GroupCellUpClass, GroupCellDownClass,
GroupContainerClass, 0, 1, long long int> GroupOctreeClass;
typedef FStarPUAllCpuCapacities<FTestKernels< GroupCellClass, GroupContainerClass >> GroupKernelClass;
typedef FStarPUCpuWrapper<typename GroupOctreeClass::CellGroupClass, GroupCellClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass> GroupCpuWrapper;
......@@ -75,13 +83,14 @@ int main(int argc, char* argv[]){
// Get params
const int NbLevels = FParameters::getValue(argc,argv,FParameterDefinitions::OctreeHeight.options, 5);
const int groupSize = FParameters::getValue(argc,argv,LocalOptionBlocSize.options, 250);
#ifdef STARPU_USE_MPI
cout << "MPI \\o/" <<endl;
#else
const int groupSize = FParameters::getValue(argc,argv,LocalOptionBlocSize.options, 8);
const char* const mapping_filename = FParameters::getStr(argc,argv,Mapping.options, "mapping");
std::vector<MortonIndex> distributedMortonIndex = getMortonIndex(mapping_filename);
#ifndef STARPU_USE_MPI
cout << "Pas de mpi -_-\" " << endl;
#endif
//#define LOAD_FILE
#define LOAD_FILE
#ifndef LOAD_FILE
const FSize NbParticles = FParameters::getValue(argc,argv,FParameterDefinitions::NbParticles.options, FSize(20));
FRandomLoader<FReal> loader(NbParticles, 1.0, FPoint<FReal>(0,0,0), 0);
......@@ -98,21 +107,16 @@ int main(int argc, char* argv[]){
FTestParticleContainer<FReal> allParticles;
for(FSize idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){
FPoint<FReal> particlePosition;
#ifndef LOAD_FILE
loader.fillParticle(&particlePosition);
#else
FReal ph;
loader.fillParticle(&particlePosition, &ph);
#endif
loader.fillParticle(&particlePosition);//Same with file or not
allParticles.push(particlePosition);
tree.insert(particlePosition);
}
// Put the data into the tree
//GroupOctreeClass groupedTree(NbLevels, groupSize, &tree);
//GroupOctreeClass groupedTree(NbLevels, loader.getBoxWidth(), loader.getCenterOfBox(), groupSize, &allParticles);
GroupOctreeClass groupedTree(NbLevels, loader.getBoxWidth(), loader.getCenterOfBox(), groupSize, &allParticles, false, distributedMortonIndex);
//GroupOctreeClass groupedTree(NbLevels, loader.getBoxWidth(), loader.getCenterOfBox(), groupSize, &allParticles, false, true);
GroupOctreeClass groupedTree(NbLevels, loader.getBoxWidth(), loader.getCenterOfBox(), groupSize, &allParticles, false, true, 0.2);
//groupedTree.printInfoBlocks();
//GroupOctreeClass groupedTree(NbLevels, loader.getBoxWidth(), loader.getCenterOfBox(), groupSize, &allParticles, false, true, 0.2);
groupedTree.printInfoBlocks();
// Check tree structure at leaf level
groupedTree.forEachCellLeaf<GroupContainerClass>([&](GroupCellClass gcell, GroupContainerClass* gleaf){
const ContainerClass* src = tree.getLeafSrc(gcell.getMortonIndex());
......@@ -128,18 +132,24 @@ int main(int argc, char* argv[]){
// Run the algorithm
GroupKernelClass groupkernel;
GroupAlgorithm groupalgo(&groupedTree,&groupkernel);
GroupAlgorithm groupalgo(&groupedTree,&groupkernel, distributedMortonIndex);
groupalgo.execute();
// Usual algorithm
KernelClass kernels; // FTestKernels FBasicKernels
FmmClass algo(&tree,&kernels); //FFmmAlgorithm FFmmAlgorithmThread
algo.execute();
int rank = groupalgo.getRank();
for(int i = 0; i < groupedTree.getHeight(); ++i)
{
if(groupedTree.getNbCellGroupAtLevel(i) < groupalgo.getNProc())
std::cout << "Error at level " << i << std::endl;
}
// Validate the result
for(int idxLevel = 2 ; idxLevel < groupedTree.getHeight() ; ++idxLevel){
for(int idxGroup = 0 ; idxGroup < groupedTree.getNbCellGroupAtLevel(idxLevel) ; ++idxGroup){
if(groupalgo.isDataOwned(idxGroup, groupedTree.getNbCellGroupAtLevel(idxLevel))){
GroupOctreeClass::CellGroupClass* currentCells = groupedTree.getCellGroup(idxLevel, idxGroup);
//if(groupalgo.isDataOwned(idxGroup, groupedTree.getNbCellGroupAtLevel(idxLevel))){
if(groupalgo.isDataOwnedBerenger(groupedTree.getCellGroup(idxLevel, idxGroup)->getStartingIndex(), idxLevel)){
GroupOctreeClass::CellGroupClass* currentCells = groupedTree.getCellGroup(idxLevel, idxGroup);
currentCells->forEachCell([&](GroupCellClass gcell){
const CellClass* cell = tree.getCell(gcell.getMortonIndex(), idxLevel);
if(cell == nullptr){
......@@ -160,7 +170,8 @@ int main(int argc, char* argv[]){
{
int idxLevel = groupedTree.getHeight()-1;
for(int idxGroup = 0 ; idxGroup < groupedTree.getNbCellGroupAtLevel(idxLevel) ; ++idxGroup){
if(groupalgo.isDataOwned(idxGroup, groupedTree.getNbCellGroupAtLevel(idxLevel))){
//if(groupalgo.isDataOwned(idxGroup, groupedTree.getNbCellGroupAtLevel(idxLevel))){
if(groupalgo.isDataOwnedBerenger(groupedTree.getCellGroup(groupedTree.getHeight()-1, idxGroup)->getStartingIndex(), groupedTree.getHeight()-1)){
GroupOctreeClass::ParticleGroupClass* particleGroup = groupedTree.getParticleGroup(idxGroup);
GroupOctreeClass::CellGroupClass* cellGroup = groupedTree.getCellGroup(idxLevel, idxGroup);
cellGroup->forEachCell([&](GroupCellClass cell){
......@@ -183,3 +194,27 @@ int main(int argc, char* argv[]){
}
return 0;
}
std::vector<MortonIndex> getMortonIndex(const char* const mapping_filename)
{
std::vector<MortonIndex> ret;
std::ifstream fichier(mapping_filename, ios::in); // on ouvre le fichier en lecture
if(fichier) // si l'ouverture a réussi
{
int nbProcess;
fichier >> nbProcess;
for(int i = 0; i < nbProcess; ++i)
{
MortonIndex start, end;
fichier >> start >> end;
ret.push_back(start);
ret.push_back(end);
}
// instructions
fichier.close(); // on ferme le fichier
}
else // sinon
cerr << "Impossible d'ouvrir le fichier !" << endl;
return ret;
}
......@@ -129,12 +129,15 @@ int main(int argc, char* argv[]){
}
FLOG(std::cout << "My last index is " << leftLimite << "\n");
FLOG(std::cout << "My left limite is " << myLeftLimite << "\n");
std::cout << "My last index is (" << mpiComm.global().processId() << ") " << leftLimite << "\n";
std::cout << "My left limite is (" << mpiComm.global().processId() << ") " << myLeftLimite << "\n";
std::cout << "Size (" << mpiComm.global().processId() << ") " << allParticles.getNbParticles() << "\n";
// Put the data into the tree
GroupOctreeClass groupedTree(NbLevels, loader.getBoxWidth(), loader.getCenterOfBox(), groupSize,
&allParticles, true, leftLimite);
groupedTree.printInfoBlocks();
//groupedTree.printInfoBlocks();
// Run the algorithm
GroupKernelClass groupkernel;
......
......@@ -9,10 +9,10 @@
#+TAGS: noexport(n)
* Implicit MPI in Scalfmm using StarPU
* Implicit MPI in Scalfmm using StarPU
** Installation
*** Requirement
#+BEGIN_SRC sh :exports code :eval never
#+BEGIN_SRC sh
sudo pacman -S openmpi fftw blas lapack
#+END_SRC
......@@ -250,7 +250,7 @@ Now let's look at the function that build handles.
There is at least 2 new stuff. The variable "where" which tell if a handle is in the node memory or if it's somewhere else. And the variable "registeringNode" which tell on which node the block should be registered.
*** Checking result
Now I have a beautiful mapping function how on earth am I suppose to check if the result is correct ?
Now that I have a beautiful mapping function, how on earth am I suppose to check if the result is correct ?
To answer this question, I need to clarify a little bit on how StarPU MPI work (from what I understood of the documentation).
When a node register a data, every time a task will write this data somewhere else, the fresh data should be write back to the node that registered the data.
So each node should have an up to date value in its own cells.
......@@ -311,3 +311,101 @@ For those who prefer the C code, there it is.
}
}
#+END_SRC
*** Bérenger's Mapping
The next goal is to try matching the data mapping used by Bérenger in the explicit algorithm of MPI.
Knowing that the same data mapping should imply the same task mapping.
**** Same leaf mapping
Because the explicit MPI algorithm first sort particles before knowing which node works on which data, I choose to dump the data related to an execution.
Which mean, the particle data and the morton index done by each mpi node.
Then, in the implicit version, the sofware will read those file to reproduce the same data mapping.
**** Same cell mapping
Because reproducing the same data mapping at leaf level, doesn't mean all the cells in other levels will be mapped on the exact same node, I had to reproduce the algorithme to split cells among nodes.
This algorithm is described in the section 4.3.1 in Bérenger Thesis.
But this algorithm isn't aware of the groups of the grouped tree and because the implicit mpi code split data only by groups of the grouped tree I had to made sligth changes.
Keep in mind that mpi data transfert are managed by starpu mpi and use starpu_data_handle. And in the implicite algorithm, data handles correspond to a group of the grouped tree.
So I choose that if the first cell of the group is mapped on the node i, the whole group is mapped on the node i.
That's when an other problem come out. Because the mpi quicksort algorithm make sure that all the particles of a leaf are on the same node, but it does not make sure that all cells of a group are mapped on the same mpi node.
And if it does, what about the next level of the tree ? Does the distributed quicksort make sure that group of node at this level are complete and on the same node ? Firstly, it isn't the case and secondly it woud have been too hard.
So, is it possible to reproduce the exact same data mapping as Berenger does in the explicit version ?
Well, not the exact same mapping. But it is still possible to make the same mapping at a specific level. And because the first levels (ones close to the root) imply far less work than the leaf level, it is less significant if we can't match the same mapping at these levels.
For example, if there is the same number of particle per leaf and each morton index exists (so it's a perfect tree), it is possible to tell if a specific level has the same mapping as Berenger's mapping.
Why ?
If all morton index are used, each mpi node will work always on the same part of the tree which is easily predictible and with a perfect tree it is easier to make sure groups of the grouped tree are created the same way as in the explicit mpi code.
To check if a mapping error could appear at a level, there is one simple rule : the number of group at the level i, must be divisible by the number of mpi node.
There is no issue with 'not completly filled group' because, the tree is perfect.
These are numbers I tried and which seems to work pretty well.
8 mpi node. (2^3)
A size of a group in the grouped tree of 8. (because it was an octree, it looked like it was a good idea)
((2^(tree height-1))/number of mpi node) particle to generate for each node.
A tree higher than 2 levels. Usually, it was 5.
Now remain only one problem, we need to generate a particle for each Morton index.
It seemed difficult in the first place, but after discussing it with Quentin, it looked pretty easy, so here is the code.
#+BEGIN_SRC C
void fillParticleAtMortonIndex(FPoint<FReal>*const inParticlePositions, MortonIndex idx, unsigned int treeHeight){
MortonIndex mask = 0x1LL;
//Box with at the leaf level
FReal leafWidth = boxWidth / FReal(1<<(treeHeight-1));
//offset from the previous center. Which is half the box width
FReal currentOffset = leafWidth / 2.0;
//Start from the center of the box
FReal x, y, z;
x = centerOfBox.getX();
y = centerOfBox.getY();
z = centerOfBox.getZ();
for(unsigned int i = 0; i < treeHeight-1; ++i)
{
bool x_offset, y_offset, z_offset;
z_offset = (idx & mask);
idx >>= 1;
y_offset = (idx & mask);
idx >>= 1;
x_offset = (idx & mask);
idx >>= 1;
if(x_offset)
x += currentOffset;
else
x -= currentOffset;
if(y_offset)
y += currentOffset;
else
y -= currentOffset;
if(z_offset)
z += currentOffset;
else
z -= currentOffset;
//Increase the offset as we go down the tree. So the box is getting larger
currentOffset *= 2;
}
inParticlePositions->setPosition( x, y, z);
}
#+END_SRC
Let's have a quick look at the function. We want to insert a particle in specific morton index.
I use a two dimensional matrix because it's easier for the drawing.
Let's say, the morton index is 7. Which give 0111 in binary.
To find the center of the 7th box, we are going to find the center of its father.
The first bit is 0, so, it substract one fourth of the box width to the x axis.
The second bit is 1, it add one fourth of the box width to the y axis.
So now we have the center of the sub box were the 7th box is. So, we keep going.
Because the third and the fourth bits are 1, we'll respectivly add one eighth of the box width to the x and y axis.
#+CAPTION:
#+NAME: fig:MortonForNoobs
[[./morton_box_center.png]]
As you can see on the picture it is possible to do start either by the least or the most significant bit and in the function fillParticleAtMortonIndex, it start by the least significant.
**** Task mapping
//TODO check that blody DAG !
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment