testBlockedImplicitUniform.cpp 17.5 KB
Newer Older
1
2
3
4
// @FUSE_BLAS
// @FUSE_FFT
// @FUSE_STARPU
// @FUSE_MPI
5
6
7
8
9
10
11

// Keep in private GIT
#include <iostream>
#include <fstream>
#include <vector>
#include <mpi.h>

12
13
//#define _VERBOSE_LEAF
#define LOAD_FILE
14

15
#include "Utils/FGlobal.hpp"
16

17
#include "GroupTree/Core/FGroupTree.hpp"                      // Celui de develop
18

19
20
21
#include "Components/FSimpleLeaf.hpp"                      // same as develop
#include "Components/FSymbolicData.hpp"                    // same as develop
#include "Containers/FVector.hpp"                          // same as develop
22
23


24
25
26
27
#include "Utils/FMath.hpp"
#include "Utils/FMemUtils.hpp"
#include "Utils/FParameters.hpp"
#include "Utils/FParameterNames.hpp"
28
29


30
31
//#include "Files/FRandomLoader.hpp"
#include "Files/FFmaGenericLoader.hpp"
32
33


34
35
36
#include "Kernels/Interpolation/FInterpMatrixKernel.hpp"      // same as develop
#include "Kernels/Uniform/FUnifCell.hpp"                      // same as develop
#include "Kernels/Uniform/FUnifKernel.hpp"                    // same as develop
37

38
//
39

40
41
42
43
44
45
46
47
#include "GroupTree/Core/FGroupTaskStarpuImplicitAlgorithm.hpp"    // Celui de develop
//
#include "GroupTree/StarPUUtils/FStarPUKernelCapacities.hpp"  // same as develop
#include "GroupTree/StarPUUtils/FStarPUCpuWrapper.hpp"        // same as develop
#include "GroupTree/Core/FGroupTools.hpp"                     // only check leaves and cells
//
#include "GroupTree/Core/FP2PGroupParticleContainer.hpp"       // same as develop
#include "Kernels/P2P/FP2PParticleContainer.hpp"               // same as develop
48
49


50
#include "Utils/FLeafBalance.hpp"
51

52
53
54
55
56
57
//
//
#include "Core/FFmmAlgorithm.hpp"


#include "GroupTree/Core/FGroupTools.hpp"
58

59
// Initialize the types
60
typedef double FReal;
61
62
static const int ORDER = 6;
typedef FInterpMatrixKernelR<FReal> MatrixKernelClass;
63

64
65
66
67
using GroupCellClass     = FUnifCell<FReal, ORDER>;
using GroupCellUpClass   = typename GroupCellClass::multipole_t;
using GroupCellDownClass = typename GroupCellClass::local_expansion_t;
using GroupCellSymbClass = FSymbolicData;
68
69


70
typedef FP2PGroupParticleContainer<FReal>          GroupContainerClass;
71
72
typedef FGroupTree< FReal, GroupCellSymbClass, GroupCellUpClass, GroupCellDownClass,
		    GroupContainerClass, 1, 4, FReal>  GroupOctreeClass;
73
typedef FStarPUAllCpuCapacities<FUnifKernel<FReal,GroupCellClass,GroupContainerClass,MatrixKernelClass,ORDER>> GroupKernelClass;
74
75
76
77
78
typedef FStarPUCpuWrapper<typename GroupOctreeClass::CellGroupClass, GroupCellClass,
			  GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass> GroupCpuWrapper;
typedef FGroupTaskStarPUImplicitAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass,
					  GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass,
					  GroupCpuWrapper > GroupAlgorithm;
79
80

#ifndef LOAD_FILE
81
typedef FRandomLoader<FReal>     LoaderClass;
82
#else
83
typedef FFmaGenericLoader<FReal> LoaderClass;
84
85
#endif

86
void sortParticle(FPoint<FReal> * allParticlesToSort, int treeHeight, int groupSize, std::vector<std::vector<int>> & sizeForEachGroup, std::vector<MortonIndex> & distributedMortonIndex, LoaderClass& loader, int nproc);
87
88
89
90
91
92
void createNodeRepartition(std::vector<MortonIndex> distributedMortonIndex, std::vector<std::vector<std::vector<MortonIndex>>>& nodeRepartition, int nproc, int treeHeight);
FSize getNbParticlesPerNode(FSize mpi_count, FSize mpi_rank, FSize total);

int main(int argc, char* argv[]){
    const FParameterNames LocalOptionBlocSize {
        {"-bs"},
93
94
            "The size of the block of the blocked tree"
                };
95
96
97
98
99
100
101
102
103
    const FParameterNames LocalOptionNoValidate { {"-no-validation"}, "To avoid comparing with direct computation"};
    FHelpDescribeAndExit(argc, argv, "Loutre",
                         FParameterDefinitions::OctreeHeight, FParameterDefinitions::NbParticles,
                         FParameterDefinitions::OctreeSubHeight, FParameterDefinitions::InputFile, LocalOptionBlocSize, LocalOptionNoValidate);

    // Get params
    const int NbLevels      = FParameters::getValue(argc,argv,FParameterDefinitions::OctreeHeight.options, 5);
    const int groupSize      = FParameters::getValue(argc,argv,LocalOptionBlocSize.options, 8);

104
    #ifndef STARPU_USE_MPI
105
    std::cout << "Pas de mpi -_-\" " << std::endl;
106
107
    #endif
    int mpi_rank, nproc;
108
    FMpi mpiComm(argc,argv);
109
110
    mpi_rank = mpiComm.global().processId();
    nproc = mpiComm.global().processCount();
111

112
    #ifndef LOAD_FILE
113
      std::cout << " RANDOM LOADER" << std::endl;
114
    const FSize NbParticles   = FParameters::getValue(argc,argv,FParameterDefinitions::NbParticles.options, FSize(10000));
115
    #else
116
117
    // Load the particles
    const char* const filename = FParameters::getStr(argc,argv,FParameterDefinitions::InputFile.options, "../Data/test20k.fma");
118
119
    std::cout << " FMA LOADER read from " << filename << std::endl;

120
121
    LoaderClass loader(filename);
    FAssertLF(loader.isOpen());
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
    const FSize NbParticles   = loader.getNumberOfParticles();
    #endif

    FPoint<FReal> * allParticlesToSort = new FPoint<FReal>[NbParticles];

    //Fill particles
    #ifndef LOAD_FILE
    {
        FSize idxPart = 0;
        for(int i = 0; i < mpiComm.global().processCount(); ++i){
            FSize NbParticlesPerNode = getNbParticlesPerNode(nproc, i, NbParticles);
            LoaderClass loader(NbParticlesPerNode, 1.0, FPoint<FReal>(0,0,0), i);
            FAssertLF(loader.isOpen());
            for(FSize j= 0 ; j < NbParticlesPerNode ; ++j){
                loader.fillParticle(&allParticlesToSort[idxPart]);//Same with file or not
                ++idxPart;
            }
        }
    }
    LoaderClass loader(NbParticles, 1.0, FPoint<FReal>(0,0,0));
    #else
143
    for(FSize idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){
144
        FReal physicalValue = 0.1;
145
146
        loader.fillParticle(&allParticlesToSort[idxPart], &physicalValue);//Same with file or not
    }
147
    #endif
148

149
    std::vector<MortonIndex> distributedMortonIndex;
150
    std::vector<std::vector<int>> sizeForEachGroup;
151
    sortParticle(allParticlesToSort, NbLevels, groupSize, sizeForEachGroup, distributedMortonIndex, loader, nproc);
152
153
154

    FP2PParticleContainer<FReal> allParticles;
    for(FSize idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){
155
        FReal physicalValue = 0.1;
156
        allParticles.push(allParticlesToSort[idxPart], physicalValue);
157
    }
158
    // Put the data into the tree
159
160
161
162
163
164
165
166
167
168
169
    std::cout << std::endl<< "  Group size at the leaf level " << std::endl ;
    int totalLeaves = 0 ;
    for ( std::size_t idLevel=2;  idLevel< sizeForEachGroup.size() ; ++idLevel){
        std::cout << "  Group size at level " << idLevel << std::endl ;
        totalLeaves = 0 ;
        for ( auto v : sizeForEachGroup[idLevel]){
            totalLeaves += v;
            std::cout << "   " << v ;
          }
        std::cout << std::endl ;std::cout << " Total number of leaves: " <<totalLeaves << std::endl;
      }
170
    GroupOctreeClass groupedTree(NbLevels, loader.getBoxWidth(), loader.getCenterOfBox(), groupSize, &allParticles, sizeForEachGroup, true);
171
//
172
    // Run the algorithm
173
174
175
176
    //
    int operationsToProceed =  FFmmP2M | FFmmM2M | FFmmM2L | FFmmL2L | FFmmL2P;
    
    //FFmmP2M | FFmmM2M   | FFmmM2L| FFmmL2L; //| FFmmL2P ;//FFmmP2P; //FFmmNearAndFarFields FFmmNearField| FFmmP2M | FFmmM2M | FFmmM2L | FFmmL2L | FFmmL2P;
177
178
179
    const MatrixKernelClass MatrixKernel;
    GroupKernelClass groupkernel(NbLevels, loader.getBoxWidth(), loader.getCenterOfBox(), &MatrixKernel);
    GroupAlgorithm groupalgo(&groupedTree,&groupkernel, distributedMortonIndex);
180
181
    mpiComm.global().barrier();
    FTic timerExecute;
182
    timerExecute.tic();
183
184
    starpu_fxt_start_profiling();
    groupalgo.execute(operationsToProceed);
185
    groupedTree.printInfoBlocks();
186
187
188
    mpiComm.global().barrier();
    starpu_fxt_stop_profiling();
    timerExecute.tac();
189
190
191
192
193
194
    auto timeElapsed = timerExecute.elapsed();
    double minTime,maxTime,meanTime ;
    groupTree::timeAverage(mpiComm, timeElapsed, minTime, maxTime, meanTime) ;
    std::cout <<  " time (in sec.)  on node: " << timeElapsed
               << " min " << minTime << " max " << maxTime
               << " mean " << meanTime << std::endl;
195

196
197
    // Validate the result
    if(FParameters::existParameter(argc, argv, LocalOptionNoValidate.options) == false){
198
199
        //
        typedef FP2PParticleContainer<FReal>         ContainerClass;
200
201
202
203
204
205
        typedef FSimpleLeaf<FReal, ContainerClass >  LeafClass;
        typedef FUnifCell<FReal,ORDER> CellClass;
        typedef FOctree<FReal, CellClass,ContainerClass,LeafClass> OctreeClass;
        typedef FUnifKernel<FReal,CellClass,ContainerClass,MatrixKernelClass,ORDER> KernelClass;
        typedef FFmmAlgorithm<OctreeClass,CellClass,ContainerClass,KernelClass,LeafClass> FmmClass;

206
        const FReal epsilon = 1E-10;
207
208
209
210
211
212
213
214
215
216
217
218

        OctreeClass treeCheck(NbLevels, 2,loader.getBoxWidth(),loader.getCenterOfBox());

        for(FSize idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){
            // put in tree
            treeCheck.insert(allParticlesToSort[idxPart], 0.1);
        }

        MatrixKernelClass MatrixKernelValidation;
        KernelClass kernels(NbLevels, loader.getBoxWidth(), loader.getCenterOfBox(), &MatrixKernelValidation);
        FmmClass algorithm(&treeCheck, &kernels);
        algorithm.execute(operationsToProceed);
219
220
221
        std::string fileName("output-Block-") ;
        fileName +=  std::to_string(nproc) + "-" + std::to_string(mpi_rank) + ".fma" ;
        groupTree::saveSolutionInFile(fileName, loader.getNumberOfParticles(), treeCheck) ;
222

223
224
        groupTree::checkCellTree(groupedTree, groupalgo, treeCheck, epsilon) ;
        groupTree::checkLeaves(groupedTree, groupalgo, treeCheck, epsilon) ;
225

226
        std::cout << "Comparing is over" << std::endl;
227
    }
228

Berenger Bramas's avatar
Berenger Bramas committed
229
    std::cout << "Done" << std::endl;
230
231
    return 0;
}
232
233

void sortParticle(FPoint<FReal> * allParticles, int treeHeight, int groupSize, std::vector<std::vector<int>> & sizeForEachGroup, std::vector<MortonIndex> & distributedMortonIndex, LoaderClass& loader, int nproc)
234
{
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
    //Structure pour trier
    struct ParticleSortingStruct{
        FPoint<FReal> position;
        MortonIndex mindex;
    };
    // Création d'un tableau de la structure pour trier puis remplissage du tableau
    const FSize nbParticles = loader.getNumberOfParticles();
    ParticleSortingStruct* particlesToSort = new ParticleSortingStruct[nbParticles];
    for(FSize idxPart = 0 ; idxPart < nbParticles ; ++idxPart){
        const FTreeCoordinate host = FCoordinateComputer::GetCoordinateFromPosition<FReal>(loader.getCenterOfBox(), loader.getBoxWidth(),
                                                                                           treeHeight,
                                                                                           allParticles[idxPart]);
        const MortonIndex particleIndex = host.getMortonIndex();
        particlesToSort[idxPart].mindex = particleIndex;
        particlesToSort[idxPart].position = allParticles[idxPart];
    }

    //Trie du nouveau tableau
    FQuickSort<ParticleSortingStruct, FSize>::QsOmp(particlesToSort, nbParticles, [](const ParticleSortingStruct& v1, const ParticleSortingStruct& v2){
            return v1.mindex <= v2.mindex;
        });
    //Replace tout dans l'ordre dans le tableau d'origine
    for(FSize idxPart = 0 ; idxPart < nbParticles ; ++idxPart){
        allParticles[idxPart] = particlesToSort[idxPart].position;
    }

    //Compte le nombre de feuilles
    sizeForEachGroup.resize(treeHeight);
    MortonIndex previousLeaf = -1;
    int numberOfLeaf = 0;
    for(FSize idxPart = 0 ; idxPart < nbParticles ; ++idxPart)
    {
        if(particlesToSort[idxPart].mindex != previousLeaf)
        {
            previousLeaf = particlesToSort[idxPart].mindex;
            ++numberOfLeaf;
        }
    }

    //Calcul de la taille des groupes au niveau des feuilles
275
    FLeafBalance balancer;
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
    for(int processId = 0; processId < nproc; ++processId)
    {
        FSize size_last;
        FSize countGroup;
        FSize leafOnProcess = balancer.getRight(numberOfLeaf, nproc, processId) - balancer.getLeft(numberOfLeaf, nproc, processId);
        size_last = leafOnProcess%groupSize;
        countGroup = (leafOnProcess - size_last)/groupSize;
        for(int i = 0; i < countGroup; ++i)
            sizeForEachGroup[treeHeight-1].push_back(groupSize);
        if(size_last > 0)
            sizeForEachGroup[treeHeight-1].push_back((int)size_last);
    }

    //Calcul du working interval au niveau des feuilles
    previousLeaf = -1;
    int countLeaf = 0;
    int processId = 0;
    FSize leafOnProcess = balancer.getRight(numberOfLeaf, nproc, 0) - balancer.getLeft(numberOfLeaf, nproc, 0);
    distributedMortonIndex.push_back(previousLeaf);
    for(FSize idxPart = 0 ; idxPart < nbParticles ; ++idxPart)
    {
        if(particlesToSort[idxPart].mindex != previousLeaf)
        {
            previousLeaf = particlesToSort[idxPart].mindex;
            ++countLeaf;
            if(countLeaf == leafOnProcess)
            {
                distributedMortonIndex.push_back(previousLeaf);
                distributedMortonIndex.push_back(previousLeaf);
                countLeaf = 0;
                ++processId;
                leafOnProcess = balancer.getRight(numberOfLeaf, nproc, processId) - balancer.getLeft(numberOfLeaf, nproc, processId);
            }
        }
    }
    distributedMortonIndex.push_back(particlesToSort[nbParticles - 1].mindex);
312
313
314
315
    std::cout << "Morton distribution to build the duplicated tree " <<std::endl;
    for (auto v : distributedMortonIndex)
      std::cout << "  " << v ;
    std::cout << std::endl;
316
317
318
    //Calcul des working interval à chaque niveau
    std::vector<std::vector<std::vector<MortonIndex>>> nodeRepartition;
    createNodeRepartition(distributedMortonIndex, nodeRepartition, nproc, treeHeight);
319
320
321
322
323
324
325
326
    for ( std::size_t idLevel=0;  idLevel< nodeRepartition.size() ; ++idLevel){
        std::cout << "  nodeRepartition at level " << idLevel << std::endl ;
        for ( std::size_t procID=0 ;  procID<  nodeRepartition[idLevel].size();  ++procID){
            std::cout << "  n  proc( " << procID << "  ) " <<
                         " [ " << nodeRepartition[idLevel][procID][0] << ", "
                      << nodeRepartition[idLevel][procID][1] <<" ]" <<std::endl ;
          }
      }
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
    //Pour chaque niveau calcul de la taille des groupe
    for(int idxLevel = treeHeight - 2; idxLevel >= 0; --idxLevel)
    {
        processId = 0;
        int countParticleInTheGroup = 0;
        MortonIndex previousMortonCell = -1;

        //cout << "Compute Level " << idxLevel << endl;
        for(int idxPart = 0; idxPart < nbParticles; ++idxPart)
        {
            MortonIndex mortonCell = (particlesToSort[idxPart].mindex) >> (3*(treeHeight - 1 - idxLevel));
            if(mortonCell <= nodeRepartition[idxLevel][processId][1]) //Si l'indice est dans le working interval
            {
                if(mortonCell != previousMortonCell) //Si c'est un nouvelle indice
                {
                    ++countParticleInTheGroup; //On le compte dans le groupe
                    previousMortonCell = mortonCell;
                    if(countParticleInTheGroup == groupSize) //Si le groupe est plein on ajoute le compte
                    {
                        sizeForEachGroup[idxLevel].push_back(groupSize);
                        countParticleInTheGroup = 0;
                    }
                }
            }
            else //Si l'on change d'interval de process on ajoute ce que l'on a compté
            {
                if(countParticleInTheGroup > 0)
                    sizeForEachGroup[idxLevel].push_back(countParticleInTheGroup);
                countParticleInTheGroup = 1;
                previousMortonCell = mortonCell;
                ++processId;
            }
        }
        if(countParticleInTheGroup > 0)
            sizeForEachGroup[idxLevel].push_back(countParticleInTheGroup);
    }
363
364
365
366
367
368
369
370
371
372
373
374
    // Print group size per level
  std::cout << std::endl<< "  Group size at the leaf level " << std::endl ;
  int totalLeaves = 0 ;
  for ( std::size_t idLevel=2;  idLevel< sizeForEachGroup.size() ; ++idLevel){
      std::cout << "  Group size at level " << idLevel << std::endl ;
      totalLeaves = 0 ;
      for ( auto v : sizeForEachGroup[idLevel]){
          totalLeaves += v;
          std::cout << "   " << v ;
        }
      std::cout << std::endl ;std::cout << " Total number of leaves: " <<totalLeaves << std::endl;
    }
375
376
}
void createNodeRepartition(std::vector<MortonIndex> distributedMortonIndex, std::vector<std::vector<std::vector<MortonIndex>>>& nodeRepartition, int nproc, int treeHeight) {
377
378
379
380
381
382
383
384
385
386
387
388
389
    nodeRepartition.resize(treeHeight, std::vector<std::vector<MortonIndex>>(nproc, std::vector<MortonIndex>(2)));
    for(int node_id = 0; node_id < nproc; ++node_id){
        nodeRepartition[treeHeight-1][node_id][0] = distributedMortonIndex[node_id*2];
        nodeRepartition[treeHeight-1][node_id][1] = distributedMortonIndex[node_id*2+1];
    }
    for(int idxLevel = treeHeight - 2; idxLevel >= 0  ; --idxLevel){
        nodeRepartition[idxLevel][0][0] = nodeRepartition[idxLevel+1][0][0] >> 3;
        nodeRepartition[idxLevel][0][1] = nodeRepartition[idxLevel+1][0][1] >> 3;
        for(int node_id = 1; node_id < nproc; ++node_id){
            nodeRepartition[idxLevel][node_id][0] = FMath::Max(nodeRepartition[idxLevel+1][node_id][0] >> 3, nodeRepartition[idxLevel][node_id-1][0]+1); //Berenger phd :)
            nodeRepartition[idxLevel][node_id][1] = nodeRepartition[idxLevel+1][node_id][1] >> 3;
        }
    }
390
391
392
}

FSize getNbParticlesPerNode(FSize mpi_count, FSize mpi_rank, FSize total){
393
394
395
    if(mpi_rank < (total%mpi_count))
        return ((total - (total%mpi_count))/mpi_count)+1;
    return ((total - (total%mpi_count))/mpi_count);
396
}