testBlockedImplicitAlgorithm.cpp 19.3 KB
Newer Older
1
2
3
// @FUSE_STARPU
// @FUSE_MPI
//
4
5
// Keep in private GIT
#include <iostream>
6
7
#include <fstream>
#include <vector>
8
#include <mpi.h>
9
10
11
12
using namespace std;

#include "../../Src/Utils/FGlobal.hpp"

13
#include "../../Src/GroupTree/Core/FGroupTree.hpp"
14
15

#include "../../Src/Components/FSimpleLeaf.hpp"
16
#include "../../Src/Components/FSymbolicData.hpp"
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
#include "../../Src/Containers/FVector.hpp"


#include "../../Src/Utils/FMath.hpp"
#include "../../Src/Utils/FMemUtils.hpp"
#include "../../Src/Utils/FParameters.hpp"

#include "../../Src/Files/FRandomLoader.hpp"

#include "../../Src/GroupTree/Core/FGroupSeqAlgorithm.hpp"
#include "../../Src/GroupTree/Core/FGroupTaskStarpuImplicitAlgorithm.hpp"
#include "../../Src/GroupTree/StarPUUtils/FStarPUKernelCapacities.hpp"

#include "../../Src/GroupTree/StarPUUtils/FStarPUCpuWrapper.hpp"
#include "../../Src/GroupTree/Core/FP2PGroupParticleContainer.hpp"
#include "../../Src/GroupTree/Core/FGroupTaskAlgorithm.hpp"

Berenger Bramas's avatar
Berenger Bramas committed
34
#include "../../Src/Utils/FLeafBalance.hpp"
35

36
37
38
39
40
41
42
43
44
45
#include "../../Src/Utils/FParameterNames.hpp"

#include "../../Src/Components/FTestParticleContainer.hpp"
#include "../../Src/Components/FTestCell.hpp"
#include "../../Src/Components/FTestKernels.hpp"
#include "../../Src/GroupTree/TestKernel/FGroupTestParticleContainer.hpp"

#include "../../Src/Files/FFmaGenericLoader.hpp"
#include "../../Src/Core/FFmmAlgorithm.hpp"

46
typedef double FReal;
47

48
49
50
51
52
// Initialize the types
using GroupCellClass     = FTestCell;
using GroupCellUpClass   = typename GroupCellClass::multipole_t;
using GroupCellDownClass = typename GroupCellClass::local_expansion_t;
using GroupCellSymbClass = FSymbolicData;
53
54


55
56
57
58
59
60
typedef FGroupTestParticleContainer<FReal>                                GroupContainerClass;
typedef FGroupTree< FReal, GroupCellSymbClass, GroupCellUpClass, GroupCellDownClass,
                    GroupContainerClass, 0, 1, long long int>  GroupOctreeClass;
typedef FStarPUAllCpuCapacities<FTestKernels< GroupCellClass, GroupContainerClass >>  GroupKernelClass;
typedef FStarPUCpuWrapper<typename GroupOctreeClass::CellGroupClass, GroupCellClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass> GroupCpuWrapper;
typedef FGroupTaskStarPUImplicitAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupCpuWrapper > GroupAlgorithm;
61

62
63
64
65
66
typedef FTestCell                   CellClass;
typedef FTestParticleContainer<FReal>      ContainerClass;
typedef FSimpleLeaf<FReal, ContainerClass >                     LeafClass;
typedef FOctree<FReal, CellClass, ContainerClass , LeafClass >  OctreeClass;
typedef FTestKernels< CellClass, ContainerClass >         KernelClass;
67

68
69
// FFmmAlgorithmTask FFmmAlgorithmThread
typedef FFmmAlgorithm<OctreeClass, CellClass, ContainerClass, KernelClass, LeafClass >     FmmClass;
70
//#define LOAD_FILE
71
#ifndef LOAD_FILE
72
typedef FRandomLoader<FReal> LoaderClass;
73
#else
74
typedef FFmaGenericLoader<FReal> LoaderClass;
75
76
#endif
void timeAverage(int mpi_rank, int nproc, double elapsedTime);
77
void sortParticle(FPoint<FReal> * allParticlesToSort, int treeHeight, int groupSize, vector<vector<int>> & sizeForEachGroup, vector<MortonIndex> & distributedMortonIndex, LoaderClass& loader, int nproc);
78
void createNodeRepartition(std::vector<MortonIndex> distributedMortonIndex, std::vector<std::vector<std::vector<MortonIndex>>>& nodeRepartition, int nproc, int treeHeight);
79
FSize getNbParticlesPerNode(FSize mpi_count, FSize mpi_rank, FSize total);
80
81
82
83

int main(int argc, char* argv[]){
    const FParameterNames LocalOptionBlocSize {
        {"-bs"},
84
85
            "The size of the block of the blocked tree"
                };
86
87
    FHelpDescribeAndExit(argc, argv, "Test the blocked tree by counting the particles.",
                         FParameterDefinitions::OctreeHeight, FParameterDefinitions::NbParticles,
88
                         FParameterDefinitions::OctreeSubHeight, FParameterDefinitions::InputFile, LocalOptionBlocSize);
89
90
91

    // Get params
    const int NbLevels      = FParameters::getValue(argc,argv,FParameterDefinitions::OctreeHeight.options, 5);
92
93
    const int groupSize      = FParameters::getValue(argc,argv,LocalOptionBlocSize.options, 8);

94
95
96
97
    #ifndef STARPU_USE_MPI
    cout << "Pas de mpi -_-\" " << endl;
    #endif
    int mpi_rank, nproc;
98
    FMpi mpiComm(argc,argv);
99
100
    mpi_rank = mpiComm.global().processId();
    nproc = mpiComm.global().processCount();
101
102


103
    #ifndef LOAD_FILE
104
    const FSize NbParticles   = FParameters::getValue(argc,argv,FParameterDefinitions::NbParticles.options, FSize(10000));
105
    #else
106
107
    // Load the particles
    const char* const filename = FParameters::getStr(argc,argv,FParameterDefinitions::InputFile.options, "../Data/test20k.fma");
108
    LoaderClass loader(filename);
109
    FAssertLF(loader.isOpen());
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
    const FSize NbParticles   = loader.getNumberOfParticles();
    #endif

    FPoint<FReal> * allParticlesToSort = new FPoint<FReal>[NbParticles];

    //Fill particles
    #ifndef LOAD_FILE
    {
        FSize idxPart = 0;
        for(int i = 0; i < mpiComm.global().processCount(); ++i){
            FSize NbParticlesPerNode = getNbParticlesPerNode(nproc, i, NbParticles);
            LoaderClass loader(NbParticlesPerNode, 1.0, FPoint<FReal>(0,0,0), i);
            FAssertLF(loader.isOpen());
            for(FSize j= 0 ; j < NbParticlesPerNode ; ++j){
                loader.fillParticle(&allParticlesToSort[idxPart]);//Same with file or not
                ++idxPart;
            }
        }
    }
    LoaderClass loader(NbParticles, 1.0, FPoint<FReal>(0,0,0));
    #else
131
    for(FSize idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){
132
        loader.fillParticle(&allParticlesToSort[idxPart]);//Same with file or not
133
    }
134
    #endif
135

136
137
138
    // Usual octree
    OctreeClass tree(NbLevels, FParameters::getValue(argc,argv,FParameterDefinitions::OctreeSubHeight.options, 2),
                     loader.getBoxWidth(), loader.getCenterOfBox());
139
140
    std::vector<MortonIndex> distributedMortonIndex;
    vector<vector<int>> sizeForEachGroup;
141
    FTestParticleContainer<FReal> allParticles;
142
    sortParticle(allParticlesToSort, NbLevels, groupSize, sizeForEachGroup, distributedMortonIndex, loader, nproc);
143
144
145
    for(FSize idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){
        allParticles.push(allParticlesToSort[idxPart]);
        tree.insert(allParticlesToSort[idxPart]);
146
147
148
    }
    delete allParticlesToSort;
    allParticlesToSort = nullptr;
149
    // Put the data into the tree
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164

    //GroupOctreeClass groupedTree(NbLevels, loader.getBoxWidth(), loader.getCenterOfBox(), groupSize, &allParticles, false);
    GroupOctreeClass groupedTree(NbLevels, loader.getBoxWidth(), loader.getCenterOfBox(), groupSize, &allParticles, sizeForEachGroup, false);

    //Check tree structure at leaf level
    groupedTree.forEachCellLeaf<GroupContainerClass>(

        [&](GroupCellSymbClass* gsymb ,
            GroupCellUpClass*   /* gmul */,
            GroupCellDownClass* /* gloc */,
            GroupContainerClass* gleaf)
        {
            const ContainerClass* src = tree.getLeafSrc(gsymb->getMortonIndex());
            if(src == nullptr){
                std::cout << "[PartEmpty] Error cell should not exist " << gsymb->getMortonIndex() << "\n";
165
            }
166
167
168
169
170
171
172
            else {
                if(src->getNbParticles() != gleaf->getNbParticles()){
                    std::cout << "[Part] Nb particles is different at index " << gsymb->getMortonIndex() << " is " << gleaf->getNbParticles() << " should be " << src->getNbParticles() << "\n";
                }
            }
        });

173
174
    // Run the algorithm
    GroupKernelClass groupkernel;
175
    GroupAlgorithm groupalgo(&groupedTree,&groupkernel, distributedMortonIndex);
176
177
178
179
180
181
182
    mpiComm.global().barrier();
    FTic timerExecute;
    groupalgo.execute();
    mpiComm.global().barrier();
    double elapsedTime = timerExecute.tacAndElapsed();
    timeAverage(mpi_rank, nproc, elapsedTime);

183
184
185
186
    // Usual algorithm
    KernelClass kernels;            // FTestKernels FBasicKernels
    FmmClass algo(&tree,&kernels);  //FFmmAlgorithm FFmmAlgorithmThread
    algo.execute();
187
188
189
190
191
    int rank = groupalgo.getRank();
    for(int i = 2; i < groupedTree.getHeight(); ++i)//No task at level 0 and 1
        if(groupedTree.getNbCellGroupAtLevel(i) < groupalgo.getNProc() && rank == 0)
            std::cout << "Error at level " << i << std::endl;

192
    // Validate the result
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
    for(int idxLevel = 2 ; idxLevel < groupedTree.getHeight() ; ++idxLevel){
        for(int idxGroup = 0 ; idxGroup < groupedTree.getNbCellGroupAtLevel(idxLevel) ; ++idxGroup){
            if(groupalgo.isDataOwnedBerenger(groupedTree.getCellGroup(idxLevel, idxGroup)->getStartingIndex(), idxLevel)){
                GroupOctreeClass::CellGroupClass* currentCells = groupedTree.getCellGroup(idxLevel, idxGroup);
                currentCells->forEachCell(
                    [&](GroupCellSymbClass* gsymb,
                        GroupCellUpClass*   gmul,
                        GroupCellDownClass* gloc)
                    {
                        const CellClass* cell = tree.getCell(gsymb->getMortonIndex(), idxLevel);
                        if(cell == nullptr) {
                            std::cout << "[Empty node(" << rank << ")] Error cell should not exist " << gsymb->getMortonIndex() << "\n";
                        } else {
                            if(gmul->get() != cell->getMultipoleData().get()) {
                                std::cout << "[Up node(" << rank << ")] Up is different at index " << gsymb->getMortonIndex() << " level " << idxLevel << " is " << gmul->get() << " should be " << cell->getMultipoleData().get() << "\n";
                            }
                            if(gloc->get() != cell->getLocalExpansionData().get()){
                                std::cout << "[Down node(" << rank << ")] Down is different at index " << gsymb->getMortonIndex() << " level " << idxLevel << " is " << gloc->get() << " should be " << cell->getMultipoleData().get() << "\n";
                            }
                        }
                    });
            }
        }
    }
    {
        int idxLevel = groupedTree.getHeight()-1;
        for(int idxGroup = 0 ; idxGroup < groupedTree.getNbCellGroupAtLevel(idxLevel) ; ++idxGroup){
            if(groupalgo.isDataOwnedBerenger(groupedTree.getCellGroup(groupedTree.getHeight()-1, idxGroup)->getStartingIndex(), groupedTree.getHeight()-1)){
                GroupOctreeClass::ParticleGroupClass* particleGroup = groupedTree.getParticleGroup(idxGroup);
                GroupOctreeClass::CellGroupClass* cellGroup = groupedTree.getCellGroup(idxLevel, idxGroup);
                cellGroup->forEachCell(
                    [&](GroupCellSymbClass* gsymb,
                        GroupCellUpClass*   gmul,
                        GroupCellDownClass* gloc)
                    {
                        MortonIndex midx = gsymb->getMortonIndex();
                        const int leafIdx = particleGroup->getLeafIndex(midx);
                        GroupContainerClass leaf = particleGroup->template getLeaf<GroupContainerClass>(leafIdx);
                        const FSize nbPartsInLeaf = leaf.getNbParticles();
                        if(gmul->get() != nbPartsInLeaf){
                            std::cout << "[P2M node(" << rank << ")] Error a Cell has " << gmul->get() << " (it should be " << nbPartsInLeaf << ")\n";
                        }
                        const long long int* dataDown = leaf.getDataDown();
                        for(FSize idxPart = 0 ; idxPart < nbPartsInLeaf ; ++idxPart){
                            if(dataDown[idxPart] != loader.getNumberOfParticles()-1){
                                std::cout << "[Full node(" << rank << ")] Error a particle has " << dataDown[idxPart] << " (it should be " << (loader.getNumberOfParticles()-1) << ") at index " << gsymb->getMortonIndex() << "\n";
                            }
                        }
                    });
            }
        }
    }
245
246
    return 0;
}
247
248
void timeAverage(int mpi_rank, int nproc, double elapsedTime)
{
249
    if(mpi_rank == 0)
250
	{
251
252
            double sumElapsedTime = elapsedTime;
            for(int i = 1; i < nproc; ++i)
253
		{
254
255
256
257
                    double tmp;
                    MPI_Recv(&tmp, 1, MPI_DOUBLE, i, 0, MPI_COMM_WORLD, 0);
                    if(tmp > sumElapsedTime)
                        sumElapsedTime = tmp;
258
		}
259
            std::cout << "Average time per node (implicit Cheby) : " << sumElapsedTime << "s" << std::endl;
260
	}
261
    else
262
	{
263
            MPI_Send(&elapsedTime, 1, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD);
264
	}
265
    MPI_Barrier(MPI_COMM_WORLD);
266
}
267
void sortParticle(FPoint<FReal> * allParticles, int treeHeight, int groupSize, vector<vector<int>> & sizeForEachGroup, vector<MortonIndex> & distributedMortonIndex, LoaderClass& loader, int nproc)
268
{
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
    //Structure pour trier
    struct ParticleSortingStruct{
        FPoint<FReal> position;
        MortonIndex mindex;
    };
    // Création d'un tableau de la structure pour trier puis remplissage du tableau
    const FSize nbParticles = loader.getNumberOfParticles();
    ParticleSortingStruct* particlesToSort = new ParticleSortingStruct[nbParticles];
    for(FSize idxPart = 0 ; idxPart < nbParticles ; ++idxPart){
        const FTreeCoordinate host = FCoordinateComputer::GetCoordinateFromPosition<FReal>(loader.getCenterOfBox(), loader.getBoxWidth(),
                                                                                           treeHeight,
                                                                                           allParticles[idxPart]);
        const MortonIndex particleIndex = host.getMortonIndex();
        particlesToSort[idxPart].mindex = particleIndex;
        particlesToSort[idxPart].position = allParticles[idxPart];
    }
285

286
287
288
289
290
291
292
293
294
295
296
297
298
299
    //Trie du nouveau tableau
    FQuickSort<ParticleSortingStruct, FSize>::QsOmp(particlesToSort, nbParticles, [](const ParticleSortingStruct& v1, const ParticleSortingStruct& v2){
            return v1.mindex <= v2.mindex;
        });
    //Replace tout dans l'ordre dans le tableau d'origine
    for(FSize idxPart = 0 ; idxPart < nbParticles ; ++idxPart){
        allParticles[idxPart] = particlesToSort[idxPart].position;
    }

    //Compte le nombre de feuilles
    sizeForEachGroup.resize(treeHeight);
    MortonIndex previousLeaf = -1;
    int numberOfLeaf = 0;
    for(FSize idxPart = 0 ; idxPart < nbParticles ; ++idxPart)
300
	{
301
            if(particlesToSort[idxPart].mindex != previousLeaf)
302
		{
303
304
                    previousLeaf = particlesToSort[idxPart].mindex;
                    ++numberOfLeaf;
305
306
307
		}
	}

308
    //Calcul de la taille des groupes au niveau des feuilles
309
    FLeafBalance balancer;
310
    for(int processId = 0; processId < nproc; ++processId)
311
	{
312
313
314
315
316
317
318
319
320
            FSize size_last;
            FSize countGroup;
            FSize leafOnProcess = balancer.getRight(numberOfLeaf, nproc, processId) - balancer.getLeft(numberOfLeaf, nproc, processId);
            size_last = leafOnProcess%groupSize;
            countGroup = (leafOnProcess - size_last)/groupSize;
            for(int i = 0; i < countGroup; ++i)
                sizeForEachGroup[treeHeight-1].push_back(groupSize);
            if(size_last > 0)
                sizeForEachGroup[treeHeight-1].push_back((int)size_last);
321
	}
322
323
324
325
326
327
328
329

    //Calcul du working interval au niveau des feuilles
    previousLeaf = -1;
    int countLeaf = 0;
    int processId = 0;
    FSize leafOnProcess = balancer.getRight(numberOfLeaf, nproc, 0) - balancer.getLeft(numberOfLeaf, nproc, 0);
    distributedMortonIndex.push_back(previousLeaf);
    for(FSize idxPart = 0 ; idxPart < nbParticles ; ++idxPart)
330
	{
331
            if(particlesToSort[idxPart].mindex != previousLeaf)
332
		{
333
334
335
                    previousLeaf = particlesToSort[idxPart].mindex;
                    ++countLeaf;
                    if(countLeaf == leafOnProcess)
336
			{
337
338
339
340
341
                            distributedMortonIndex.push_back(previousLeaf);
                            distributedMortonIndex.push_back(previousLeaf);
                            countLeaf = 0;
                            ++processId;
                            leafOnProcess = balancer.getRight(numberOfLeaf, nproc, processId) - balancer.getLeft(numberOfLeaf, nproc, processId);
342
343
344
			}
		}
	}
345
    distributedMortonIndex.push_back(particlesToSort[nbParticles - 1].mindex);
346

347
348
349
    //Calcul des working interval à chaque niveau
    std::vector<std::vector<std::vector<MortonIndex>>> nodeRepartition;
    createNodeRepartition(distributedMortonIndex, nodeRepartition, nproc, treeHeight);
350

351
352
    //Pour chaque niveau calcul de la taille des groupe
    for(int idxLevel = treeHeight - 2; idxLevel >= 0; --idxLevel)
353
	{
354
355
356
            processId = 0;
            int countParticleInTheGroup = 0;
            MortonIndex previousMortonCell = -1;
357

358
359
            //cout << "Compute Level " << idxLevel << endl;
            for(int idxPart = 0; idxPart < nbParticles; ++idxPart)
360
		{
361
362
                    MortonIndex mortonCell = (particlesToSort[idxPart].mindex) >> (3*(treeHeight - 1 - idxLevel));
                    if(mortonCell <= nodeRepartition[idxLevel][processId][1]) //Si l'indice est dans le working interval
363
			{
364
                            if(mortonCell != previousMortonCell) //Si c'est un nouvelle indice
365
				{
366
367
368
                                    ++countParticleInTheGroup; //On le compte dans le groupe
                                    previousMortonCell = mortonCell;
                                    if(countParticleInTheGroup == groupSize) //Si le groupe est plein on ajoute le compte
369
					{
370
371
                                            sizeForEachGroup[idxLevel].push_back(groupSize);
                                            countParticleInTheGroup = 0;
372
373
374
					}
				}
			}
375
                    else //Si l'on change d'interval de process on ajoute ce que l'on a compté
376
			{
377
378
379
380
381
                            if(countParticleInTheGroup > 0)
                                sizeForEachGroup[idxLevel].push_back(countParticleInTheGroup);
                            countParticleInTheGroup = 1;
                            previousMortonCell = mortonCell;
                            ++processId;
382
383
			}
		}
384
385
            if(countParticleInTheGroup > 0)
                sizeForEachGroup[idxLevel].push_back(countParticleInTheGroup);
386
387
388
	}
}
void createNodeRepartition(std::vector<MortonIndex> distributedMortonIndex, std::vector<std::vector<std::vector<MortonIndex>>>& nodeRepartition, int nproc, int treeHeight) {
389
390
391
392
393
394
395
396
397
398
399
400
401
    nodeRepartition.resize(treeHeight, std::vector<std::vector<MortonIndex>>(nproc, std::vector<MortonIndex>(2)));
    for(int node_id = 0; node_id < nproc; ++node_id){
        nodeRepartition[treeHeight-1][node_id][0] = distributedMortonIndex[node_id*2];
        nodeRepartition[treeHeight-1][node_id][1] = distributedMortonIndex[node_id*2+1];
    }
    for(int idxLevel = treeHeight - 2; idxLevel >= 0  ; --idxLevel){
        nodeRepartition[idxLevel][0][0] = nodeRepartition[idxLevel+1][0][0] >> 3;
        nodeRepartition[idxLevel][0][1] = nodeRepartition[idxLevel+1][0][1] >> 3;
        for(int node_id = 1; node_id < nproc; ++node_id){
            nodeRepartition[idxLevel][node_id][0] = FMath::Max(nodeRepartition[idxLevel+1][node_id][0] >> 3, nodeRepartition[idxLevel][node_id-1][0]+1); //Berenger phd :)
            nodeRepartition[idxLevel][node_id][1] = nodeRepartition[idxLevel+1][node_id][1] >> 3;
        }
    }
402
}
403
FSize getNbParticlesPerNode(FSize mpi_count, FSize mpi_rank, FSize total){
404
405
406
    if(mpi_rank < (total%mpi_count))
        return ((total - (total%mpi_count))/mpi_count)+1;
    return ((total - (total%mpi_count))/mpi_count);
407
}