testBlockedImplicitAlgorithm.cpp 16.7 KB
Newer Older
1 2 3

// Keep in private GIT
#include <iostream>
4 5
#include <fstream>
#include <vector>
6
#include <mpi.h>
7 8 9 10
using namespace std;

#include "../../Src/Utils/FGlobal.hpp"

11
#include "../../Src/GroupTree/Core/FGroupTree.hpp"
12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30

#include "../../Src/Components/FSimpleLeaf.hpp"
#include "../../Src/Containers/FVector.hpp"


#include "../../Src/Utils/FMath.hpp"
#include "../../Src/Utils/FMemUtils.hpp"
#include "../../Src/Utils/FParameters.hpp"

#include "../../Src/Files/FRandomLoader.hpp"

#include "../../Src/GroupTree/Core/FGroupSeqAlgorithm.hpp"
#include "../../Src/GroupTree/Core/FGroupTaskStarpuImplicitAlgorithm.hpp"
#include "../../Src/GroupTree/StarPUUtils/FStarPUKernelCapacities.hpp"

#include "../../Src/GroupTree/StarPUUtils/FStarPUCpuWrapper.hpp"
#include "../../Src/GroupTree/Core/FP2PGroupParticleContainer.hpp"
#include "../../Src/GroupTree/Core/FGroupTaskAlgorithm.hpp"

31 32
#include "../../Src/BalanceTree/FLeafBalance.hpp"

33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
#include "../../Src/Utils/FParameterNames.hpp"

#include "../../Src/Components/FTestParticleContainer.hpp"
#include "../../Src/Components/FTestCell.hpp"
#include "../../Src/Components/FTestKernels.hpp"
#include "../../Src/GroupTree/TestKernel/FGroupTestParticleContainer.hpp"
#include "../../Src/GroupTree/TestKernel/FTestCellPOD.hpp"

#include "../../Src/Files/FFmaGenericLoader.hpp"
#include "../../Src/Core/FFmmAlgorithm.hpp"

    typedef double FReal;

    // Initialize the types
    typedef FTestCellPODCore  GroupCellSymbClass;
    typedef FTestCellPODData  GroupCellUpClass;
    typedef FTestCellPODData  GroupCellDownClass;
    typedef FTestCellPOD      GroupCellClass;


    typedef FGroupTestParticleContainer<FReal>                                GroupContainerClass;
54
    typedef FGroupTree< FReal, GroupCellClass, GroupCellSymbClass, GroupCellUpClass, GroupCellDownClass,
55 56 57 58 59 60 61 62 63 64 65 66 67
            GroupContainerClass, 0, 1, long long int>  GroupOctreeClass;
    typedef FStarPUAllCpuCapacities<FTestKernels< GroupCellClass, GroupContainerClass >>  GroupKernelClass;
    typedef FStarPUCpuWrapper<typename GroupOctreeClass::CellGroupClass, GroupCellClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass> GroupCpuWrapper;
    typedef FGroupTaskStarPUImplicitAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupCpuWrapper > GroupAlgorithm;

    typedef FTestCell                   CellClass;
    typedef FTestParticleContainer<FReal>      ContainerClass;
    typedef FSimpleLeaf<FReal, ContainerClass >                     LeafClass;
    typedef FOctree<FReal, CellClass, ContainerClass , LeafClass >  OctreeClass;
    typedef FTestKernels< CellClass, ContainerClass >         KernelClass;

    // FFmmAlgorithmTask FFmmAlgorithmThread
    typedef FFmmAlgorithm<OctreeClass, CellClass, ContainerClass, KernelClass, LeafClass >     FmmClass;
68
//#define LOAD_FILE
69 70 71 72 73 74
#ifndef LOAD_FILE
	typedef FRandomLoader<FReal> LoaderClass;
#else
	typedef FFmaGenericLoader<FReal> LoaderClass;
#endif
void timeAverage(int mpi_rank, int nproc, double elapsedTime);
75
void sortParticle(FPoint<FReal> * allParticlesToSort, int treeHeight, int groupSize, vector<vector<int>> & sizeForEachGroup, vector<MortonIndex> & distributedMortonIndex, LoaderClass& loader, int nproc);
76
void createNodeRepartition(std::vector<MortonIndex> distributedMortonIndex, std::vector<std::vector<std::vector<MortonIndex>>>& nodeRepartition, int nproc, int treeHeight);
77
FSize getNbParticlesPerNode(FSize mpi_count, FSize mpi_rank, FSize total);
78 79 80 81 82 83 84 85

int main(int argc, char* argv[]){
    const FParameterNames LocalOptionBlocSize {
        {"-bs"},
        "The size of the block of the blocked tree"
    };
    FHelpDescribeAndExit(argc, argv, "Test the blocked tree by counting the particles.",
                         FParameterDefinitions::OctreeHeight, FParameterDefinitions::NbParticles,
86
                         FParameterDefinitions::OctreeSubHeight, FParameterDefinitions::InputFile, LocalOptionBlocSize);
87 88 89

    // Get params
    const int NbLevels      = FParameters::getValue(argc,argv,FParameterDefinitions::OctreeHeight.options, 5);
90 91 92
    const int groupSize      = FParameters::getValue(argc,argv,LocalOptionBlocSize.options, 8);

#ifndef STARPU_USE_MPI
93 94
		cout << "Pas de mpi -_-\" " << endl;
#endif
95 96 97 98 99 100
	int mpi_rank, nproc;
    FMpi mpiComm(argc,argv);
	mpi_rank = mpiComm.global().processId();
	nproc = mpiComm.global().processCount();


101
#ifndef LOAD_FILE
102
    const FSize NbParticles   = FParameters::getValue(argc,argv,FParameterDefinitions::NbParticles.options, FSize(10000));
103 104 105
#else
    // Load the particles
    const char* const filename = FParameters::getStr(argc,argv,FParameterDefinitions::InputFile.options, "../Data/test20k.fma");
106
    LoaderClass loader(filename);
107
    FAssertLF(loader.isOpen());
108 109
	const FSize NbParticles   = loader.getNumberOfParticles();
#endif
110

111
	FPoint<FReal> * allParticlesToSort = new FPoint<FReal>[NbParticles];
112 113 114

	//Fill particles
#ifndef LOAD_FILE
115 116 117 118 119 120 121 122 123 124
	{
		FSize idxPart = 0;
		for(int i = 0; i < mpiComm.global().processCount(); ++i){
			FSize NbParticlesPerNode = getNbParticlesPerNode(nproc, i, NbParticles);
			LoaderClass loader(NbParticlesPerNode, 1.0, FPoint<FReal>(0,0,0), i);
			FAssertLF(loader.isOpen());
			for(FSize j= 0 ; j < NbParticlesPerNode ; ++j){
				loader.fillParticle(&allParticlesToSort[idxPart]);//Same with file or not
				++idxPart;
			}
125 126
		}
	}
127
	LoaderClass loader(NbParticles, 1.0, FPoint<FReal>(0,0,0));
128
#else
129
    for(FSize idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){
130
        loader.fillParticle(&allParticlesToSort[idxPart]);//Same with file or not
131
    }
132
#endif
133

134 135 136
    // Usual octree
    OctreeClass tree(NbLevels, FParameters::getValue(argc,argv,FParameterDefinitions::OctreeSubHeight.options, 2),
                     loader.getBoxWidth(), loader.getCenterOfBox());
137
	std::vector<MortonIndex> distributedMortonIndex;
138
	vector<vector<int>> sizeForEachGroup;
139
    FTestParticleContainer<FReal> allParticles;
140
	sortParticle(allParticlesToSort, NbLevels, groupSize, sizeForEachGroup, distributedMortonIndex, loader, nproc);
141 142 143 144
    for(FSize idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){
        allParticles.push(allParticlesToSort[idxPart]);
        tree.insert(allParticlesToSort[idxPart]);
	}
145 146
	delete allParticlesToSort;
	allParticlesToSort = nullptr;
147
    // Put the data into the tree
148
	
149 150
	//GroupOctreeClass groupedTree(NbLevels, loader.getBoxWidth(), loader.getCenterOfBox(), groupSize, &allParticles, false);
	GroupOctreeClass groupedTree(NbLevels, loader.getBoxWidth(), loader.getCenterOfBox(), groupSize, &allParticles, sizeForEachGroup, false);
151

152
	 //Check tree structure at leaf level
153 154 155 156 157 158 159 160 161 162 163
    groupedTree.forEachCellLeaf<GroupContainerClass>([&](GroupCellClass gcell, GroupContainerClass* gleaf){
        const ContainerClass* src = tree.getLeafSrc(gcell.getMortonIndex());
        if(src == nullptr){
            std::cout << "[PartEmpty] Error cell should not exist " << gcell.getMortonIndex() << "\n";
        }
        else {
            if(src->getNbParticles() != gleaf->getNbParticles()){
                std::cout << "[Part] Nb particles is different at index " << gcell.getMortonIndex() << " is " << gleaf->getNbParticles() << " should be " << src->getNbParticles() << "\n";
            }
        }
    });
Martin Khannouz's avatar
Martin Khannouz committed
164
	
165 166
    // Run the algorithm
    GroupKernelClass groupkernel;
167
    GroupAlgorithm groupalgo(&groupedTree,&groupkernel, distributedMortonIndex);
168
	mpiComm.global().barrier();
Martin Khannouz's avatar
Martin Khannouz committed
169
	FTic timerExecute;
170
	groupalgo.execute();
171
	mpiComm.global().barrier();
172
	double elapsedTime = timerExecute.tacAndElapsed();
173 174
	timeAverage(mpi_rank, nproc, elapsedTime);
	
175 176 177 178 179
    // Usual algorithm
    KernelClass kernels;            // FTestKernels FBasicKernels
    FmmClass algo(&tree,&kernels);  //FFmmAlgorithm FFmmAlgorithmThread
    algo.execute();
	int rank = groupalgo.getRank();
180
	for(int i = 2; i < groupedTree.getHeight(); ++i)//No task at level 0 and 1
Martin Khannouz's avatar
Martin Khannouz committed
181
		if(groupedTree.getNbCellGroupAtLevel(i) < groupalgo.getNProc() && rank == 0)
182
			std::cout << "Error at level " << i << std::endl;
183
	
184 185 186
    // Validate the result
	for(int idxLevel = 2 ; idxLevel < groupedTree.getHeight() ; ++idxLevel){
		for(int idxGroup = 0 ; idxGroup < groupedTree.getNbCellGroupAtLevel(idxLevel) ; ++idxGroup){
187 188
			if(groupalgo.isDataOwnedBerenger(groupedTree.getCellGroup(idxLevel, idxGroup)->getStartingIndex(), idxLevel)){
				GroupOctreeClass::CellGroupClass* currentCells = groupedTree.getCellGroup(idxLevel, idxGroup);
189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208
				currentCells->forEachCell([&](GroupCellClass gcell){
						const CellClass* cell = tree.getCell(gcell.getMortonIndex(), idxLevel);
						if(cell == nullptr){
							std::cout << "[Empty node(" << rank << ")] Error cell should not exist " << gcell.getMortonIndex() << "\n";
						}
						else {
							if(gcell.getDataUp() != cell->getDataUp()){
								std::cout << "[Up node(" << rank << ")] Up is different at index " << gcell.getMortonIndex() << " level " << idxLevel << " is " << gcell.getDataUp() << " should be " << cell->getDataUp() << "\n";
							}
							if(gcell.getDataDown() != cell->getDataDown()){
								std::cout << "[Down node(" << rank << ")] Down is different at index " << gcell.getMortonIndex() << " level " << idxLevel << " is " << gcell.getDataDown() << " should be " << cell->getDataDown() << "\n";
							}
						}
				});
			}
		}
	}
	{
		int idxLevel = groupedTree.getHeight()-1;
		for(int idxGroup = 0 ; idxGroup < groupedTree.getNbCellGroupAtLevel(idxLevel) ; ++idxGroup){
209
			if(groupalgo.isDataOwnedBerenger(groupedTree.getCellGroup(groupedTree.getHeight()-1, idxGroup)->getStartingIndex(), groupedTree.getHeight()-1)){
210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231
				GroupOctreeClass::ParticleGroupClass* particleGroup = groupedTree.getParticleGroup(idxGroup); 
				GroupOctreeClass::CellGroupClass* cellGroup = groupedTree.getCellGroup(idxLevel, idxGroup);
				cellGroup->forEachCell([&](GroupCellClass cell){
					MortonIndex midx = cell.getMortonIndex();
					const int leafIdx = particleGroup->getLeafIndex(midx);
					GroupContainerClass leaf = particleGroup->template getLeaf<GroupContainerClass>(leafIdx);
					const FSize nbPartsInLeaf = leaf.getNbParticles();
					if(cell.getDataUp() != nbPartsInLeaf){
						std::cout << "[P2M node(" << rank << ")] Error a Cell has " << cell.getDataUp() << " (it should be " << nbPartsInLeaf << ")\n";
					}
					const long long int* dataDown = leaf.getDataDown();
					for(FSize idxPart = 0 ; idxPart < nbPartsInLeaf ; ++idxPart){
						if(dataDown[idxPart] != loader.getNumberOfParticles()-1){
							std::cout << "[Full node(" << rank << ")] Error a particle has " << dataDown[idxPart] << " (it should be " << (loader.getNumberOfParticles()-1) << ") at index " << cell.getMortonIndex() << "\n";
						}
					}
				});
			}
		}
	}
    return 0;
}
232 233 234 235 236 237 238 239 240
void timeAverage(int mpi_rank, int nproc, double elapsedTime)
{
	if(mpi_rank == 0)
	{
		double sumElapsedTime = elapsedTime;
		for(int i = 1; i < nproc; ++i)
		{
			double tmp;
			MPI_Recv(&tmp, 1, MPI_DOUBLE, i, 0, MPI_COMM_WORLD, 0);
241 242
			if(tmp > sumElapsedTime)
				sumElapsedTime = tmp;
243
		}
244
		std::cout << "Average time per node (implicit Cheby) : " << sumElapsedTime << "s" << std::endl;
245 246 247 248 249
	}
	else
	{
		MPI_Send(&elapsedTime, 1, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD);
	}
250
	MPI_Barrier(MPI_COMM_WORLD);
251
}
252
void sortParticle(FPoint<FReal> * allParticles, int treeHeight, int groupSize, vector<vector<int>> & sizeForEachGroup, vector<MortonIndex> & distributedMortonIndex, LoaderClass& loader, int nproc)
253 254 255 256 257 258 259
{
	//Structure pour trier
	struct ParticleSortingStruct{
		FPoint<FReal> position;
		MortonIndex mindex;
	};
	// Création d'un tableau de la structure pour trier puis remplissage du tableau
260
	const FSize nbParticles = loader.getNumberOfParticles();
261 262 263 264
	ParticleSortingStruct* particlesToSort = new ParticleSortingStruct[nbParticles];
	for(FSize idxPart = 0 ; idxPart < nbParticles ; ++idxPart){
		const FTreeCoordinate host = FCoordinateComputer::GetCoordinateFromPosition<FReal>(loader.getCenterOfBox(), loader.getBoxWidth(),
				treeHeight,
265
				allParticles[idxPart]);
266 267 268 269 270 271 272 273 274 275 276 277 278 279 280
		const MortonIndex particleIndex = host.getMortonIndex(treeHeight-1);
		particlesToSort[idxPart].mindex = particleIndex;
		particlesToSort[idxPart].position = allParticles[idxPart];
	}

	//Trie du nouveau tableau
	FQuickSort<ParticleSortingStruct, FSize>::QsOmp(particlesToSort, nbParticles, [](const ParticleSortingStruct& v1, const ParticleSortingStruct& v2){
			return v1.mindex <= v2.mindex;
			});
	//Replace tout dans l'ordre dans le tableau d'origine
	for(FSize idxPart = 0 ; idxPart < nbParticles ; ++idxPart){
		allParticles[idxPart] = particlesToSort[idxPart].position;
	}
	
	//Compte le nombre de feuilles
281
	sizeForEachGroup.resize(treeHeight);
282
	MortonIndex previousLeaf = -1;
283
	int numberOfLeaf = 0;
284 285 286 287 288
	for(FSize idxPart = 0 ; idxPart < nbParticles ; ++idxPart)
	{
		if(particlesToSort[idxPart].mindex != previousLeaf)
		{
			previousLeaf = particlesToSort[idxPart].mindex;
289
			++numberOfLeaf;
290 291 292 293
		}
	}

	//Calcul de la taille des groupes au niveau des feuilles
294
    FLeafBalance balancer;
295 296
	for(int processId = 0; processId < nproc; ++processId)
	{
297 298 299
		FSize size_last;
		FSize countGroup;
		FSize leafOnProcess = balancer.getRight(numberOfLeaf, nproc, processId) - balancer.getLeft(numberOfLeaf, nproc, processId);
300 301 302 303
		size_last = leafOnProcess%groupSize;
		countGroup = (leafOnProcess - size_last)/groupSize;
		for(int i = 0; i < countGroup; ++i)
			sizeForEachGroup[treeHeight-1].push_back(groupSize);
304
		if(size_last > 0)
305
			sizeForEachGroup[treeHeight-1].push_back((int)size_last);
306
	}
307
	
308 309
	//Calcul du working interval au niveau des feuilles
	previousLeaf = -1;
310
	int countLeaf = 0;
311
	int processId = 0;
312
	FSize leafOnProcess = balancer.getRight(numberOfLeaf, nproc, 0) - balancer.getLeft(numberOfLeaf, nproc, 0);
313 314 315 316 317 318 319
	distributedMortonIndex.push_back(previousLeaf);
	for(FSize idxPart = 0 ; idxPart < nbParticles ; ++idxPart)
	{
		if(particlesToSort[idxPart].mindex != previousLeaf)
		{
			previousLeaf = particlesToSort[idxPart].mindex;
			++countLeaf;
320
			if(countLeaf == leafOnProcess)
321 322 323
			{
				distributedMortonIndex.push_back(previousLeaf);
				distributedMortonIndex.push_back(previousLeaf);
324
				countLeaf = 0;
325 326
				++processId;
				leafOnProcess = balancer.getRight(numberOfLeaf, nproc, processId) - balancer.getLeft(numberOfLeaf, nproc, processId);
327 328 329 330 331 332 333 334 335 336
			}
		}
	}
	distributedMortonIndex.push_back(particlesToSort[nbParticles - 1].mindex);

	//Calcul des working interval à chaque niveau
	std::vector<std::vector<std::vector<MortonIndex>>> nodeRepartition;
	createNodeRepartition(distributedMortonIndex, nodeRepartition, nproc, treeHeight);

	//Pour chaque niveau calcul de la taille des groupe
337
	for(int idxLevel = treeHeight - 2; idxLevel >= 0; --idxLevel)
338 339 340 341 342
	{
		processId = 0;
		int countParticleInTheGroup = 0;
		MortonIndex previousMortonCell = -1;

343
		//cout << "Compute Level " << idxLevel << endl;
344 345
		for(int idxPart = 0; idxPart < nbParticles; ++idxPart)
		{
346 347
			MortonIndex mortonCell = (particlesToSort[idxPart].mindex) >> (3*(treeHeight - 1 - idxLevel));
			if(mortonCell <= nodeRepartition[idxLevel][processId][1]) //Si l'indice est dans le working interval
348 349 350 351 352 353 354 355 356 357 358 359 360 361
			{
				if(mortonCell != previousMortonCell) //Si c'est un nouvelle indice
				{
					++countParticleInTheGroup; //On le compte dans le groupe
					previousMortonCell = mortonCell;
					if(countParticleInTheGroup == groupSize) //Si le groupe est plein on ajoute le compte
					{
						sizeForEachGroup[idxLevel].push_back(groupSize);
						countParticleInTheGroup = 0;
					}
				}
			}
			else //Si l'on change d'interval de process on ajoute ce que l'on a compté
			{
362 363
				if(countParticleInTheGroup > 0)
					sizeForEachGroup[idxLevel].push_back(countParticleInTheGroup);
364
				countParticleInTheGroup = 1;
365
				previousMortonCell = mortonCell;
366 367 368
				++processId;
			}
		}
369 370
		if(countParticleInTheGroup > 0)
			sizeForEachGroup[idxLevel].push_back(countParticleInTheGroup);
371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387
	}
}
void createNodeRepartition(std::vector<MortonIndex> distributedMortonIndex, std::vector<std::vector<std::vector<MortonIndex>>>& nodeRepartition, int nproc, int treeHeight) {
	nodeRepartition.resize(treeHeight, std::vector<std::vector<MortonIndex>>(nproc, std::vector<MortonIndex>(2)));
	for(int node_id = 0; node_id < nproc; ++node_id){
		nodeRepartition[treeHeight-1][node_id][0] = distributedMortonIndex[node_id*2];
		nodeRepartition[treeHeight-1][node_id][1] = distributedMortonIndex[node_id*2+1];
	}
	for(int idxLevel = treeHeight - 2; idxLevel >= 0  ; --idxLevel){
		nodeRepartition[idxLevel][0][0] = nodeRepartition[idxLevel+1][0][0] >> 3;
		nodeRepartition[idxLevel][0][1] = nodeRepartition[idxLevel+1][0][1] >> 3;
		for(int node_id = 1; node_id < nproc; ++node_id){
			nodeRepartition[idxLevel][node_id][0] = FMath::Max(nodeRepartition[idxLevel+1][node_id][0] >> 3, nodeRepartition[idxLevel][node_id-1][0]+1); //Berenger phd :)
			nodeRepartition[idxLevel][node_id][1] = nodeRepartition[idxLevel+1][node_id][1] >> 3;
		}
	}
}
388 389 390 391 392 393
FSize getNbParticlesPerNode(FSize mpi_count, FSize mpi_rank, FSize total){
	if(mpi_rank < (total%mpi_count))
		return ((total - (total%mpi_count))/mpi_count)+1;
	return ((total - (total%mpi_count))/mpi_count);
}