testBlockedMpiAlgorithm.cpp 10.4 KB
Newer Older
1 2

// Keep in private GIT
3 4
// @FUSE_MPI
// @FUSE_STARPU
5 6 7 8

#include "../../Src/Utils/FGlobal.hpp"
#include "../../Src/Utils/FMpi.hpp"

9
#include "../../Src/GroupTree/Core/FGroupTree.hpp"
10 11 12 13 14 15 16 17 18 19

#include "../../Src/Components/FSimpleLeaf.hpp"
#include "../../Src/Containers/FVector.hpp"


#include "../../Src/Utils/FMath.hpp"
#include "../../Src/Utils/FMemUtils.hpp"
#include "../../Src/Utils/FParameters.hpp"

#include "../../Src/Files/FRandomLoader.hpp"
20
#include "../../Src/Files/FFmaGenericLoader.hpp"
21

22
#include "../../Src/GroupTree/Core/FGroupTaskStarpuMpiAlgorithm.hpp"
23

24 25
#include "../../Src/GroupTree/Core/FP2PGroupParticleContainer.hpp"
#include "../../Src/GroupTree/Core/FGroupTaskAlgorithm.hpp"
26 27 28 29 30

#include "../../Src/Utils/FParameterNames.hpp"

#include "../../Src/Components/FTestParticleContainer.hpp"
#include "../../Src/Components/FTestKernels.hpp"
31
#include "../../Src/Components/FTestCell.hpp"
32
#include "../../Src/GroupTree/TestKernel/FGroupTestParticleContainer.hpp"
33

34 35
#include "../../Src/GroupTree/TestKernel/FTestCellPOD.hpp"

36
#include "../../Src/Utils/FLeafBalance.hpp"
37 38
#include "../../Src/Files/FMpiTreeBuilder.hpp"

39
#include "../../Src/Core/FFmmAlgorithm.hpp"
40
#include "../../Src/Containers/FCoordinateComputer.hpp"
41

42
#include "../../Src/GroupTree/StarPUUtils/FStarPUKernelCapacities.hpp"
43
#include "../../Src/GroupTree/StarPUUtils/FStarPUCpuWrapper.hpp"
44

45 46 47
#include <vector>
#include <iostream>
#include <fstream>
48

49
void timeAverage(int mpi_rank, int nproc, double elapsedTime);
50
FSize getNbParticlesPerNode(FSize mpi_count, FSize mpi_rank, FSize total);
51 52 53 54 55 56 57

int main(int argc, char* argv[]){
    const FParameterNames LocalOptionBlocSize {
        {"-bs"},
        "The size of the block of the blocked tree"
    };
    FHelpDescribeAndExit(argc, argv, "Test the blocked tree by counting the particles.",
58 59
                         FParameterDefinitions::OctreeHeight,
                         FParameterDefinitions::NbParticles,
60
                         LocalOptionBlocSize);
61
    typedef double FReal;
62
    // Initialize the types
63 64 65 66 67
    typedef FTestCellPODCore  GroupCellSymbClass;
    typedef FTestCellPODData  GroupCellUpClass;
    typedef FTestCellPODData  GroupCellDownClass;
    typedef FTestCellPOD      GroupCellClass;

68 69
    typedef FGroupTestParticleContainer<FReal>                                     GroupContainerClass;
    typedef FGroupTree< FReal, GroupCellClass, GroupCellSymbClass, GroupCellUpClass, GroupCellDownClass,
70
            GroupContainerClass, 0, 1, long long int>  GroupOctreeClass;
71 72
    typedef FStarPUAllCpuCapacities<FTestKernels< GroupCellClass, GroupContainerClass >>  GroupKernelClass;
    typedef FStarPUCpuWrapper<typename GroupOctreeClass::CellGroupClass, GroupCellClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass> GroupCpuWrapper;
73 74
    typedef FGroupTaskStarPUMpiAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupCpuWrapper> GroupAlgorithm;

75 76 77 78

    FMpi mpiComm(argc, argv);
    // Get params
    const int NbLevels      = FParameters::getValue(argc,argv,FParameterDefinitions::OctreeHeight.options, 5);
Martin Khannouz's avatar
Martin Khannouz committed
79
    const int groupSize      = FParameters::getValue(argc,argv,LocalOptionBlocSize.options, 8);
80 81
    const FSize totalNbParticles = FParameters::getValue(argc,argv,FParameterDefinitions::NbParticles.options, FSize(20));
    const FSize NbParticles   = getNbParticlesPerNode(mpiComm.global().processCount(), mpiComm.global().processId(), totalNbParticles);
82
    // Load the particles
83
    FRandomLoader<FReal> loader(NbParticles, 1.0, FPoint<FReal>(0,0,0), mpiComm.global().processId());
84 85 86 87
    FAssertLF(loader.isOpen());

    // Fill the particles
    struct TestParticle{
88 89
        FPoint<FReal> position;
        const FPoint<FReal>& getPosition(){
90 91
            return position;
        }
92 93 94 95 96 97 98 99 100
		const unsigned int getWriteDataSize(void) const {
			return sizeof(FReal);
		}
		const unsigned int getWriteDataNumber(void) const {
			return 3;
		}
		const FReal* getPtrFirstData(void) const {
			return position.data();
		}
101 102 103
    };
    std::unique_ptr<TestParticle[]> particles(new TestParticle[loader.getNumberOfParticles()]);
    memset(particles.get(), 0, sizeof(TestParticle) * loader.getNumberOfParticles());
104
    for(FSize idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){
105 106
		//loader.fillParticleAtMortonIndex(&(particles[idxPart].position), mpiComm.global().processId()*NbParticles + idxPart,NbLevels);
		loader.fillParticle(&(particles[idxPart].position));
107 108 109 110
    }
    // Sort in parallel
    FVector<TestParticle> myParticles;
    FLeafBalance balancer;
111
    FMpiTreeBuilder<FReal, TestParticle >::DistributeArrayToContainer(mpiComm.global(),
112 113 114 115 116 117 118 119
                                                                particles.get(),
                                                                loader.getNumberOfParticles(),
                                                                loader.getCenterOfBox(),
                                                                loader.getBoxWidth(),
                                                                NbLevels,
                                                                &myParticles,
                                                                &balancer);

BRAMAS Berenger's avatar
BRAMAS Berenger committed
120
    FTestParticleContainer<FReal> allParticles;
121
    for(FSize idxPart = 0 ; idxPart < myParticles.getSize() ; ++idxPart){
122 123 124 125
        allParticles.push(myParticles[idxPart].position);
    }

    // Each proc need to know the righest morton index
126
    const FTreeCoordinate host = FCoordinateComputer::GetCoordinateFromPosition<FReal>(
127 128 129 130
                loader.getCenterOfBox(),
                loader.getBoxWidth(),
                NbLevels,
                myParticles[myParticles.getSize()-1].position );
131
    const MortonIndex myLeftLimite = host.getMortonIndex();
132 133 134 135 136 137 138
    MortonIndex leftLimite = -1;
    if(mpiComm.global().processId() != 0){
        FMpi::Assert(MPI_Recv(&leftLimite, sizeof(leftLimite), MPI_BYTE,
                              mpiComm.global().processId()-1, 0,
                              mpiComm.global().getComm(), MPI_STATUS_IGNORE), __LINE__);
    }
    if(mpiComm.global().processId() != mpiComm.global().processCount()-1){
BRAMAS Berenger's avatar
BRAMAS Berenger committed
139
        FMpi::Assert(MPI_Send(const_cast<MortonIndex*>(&myLeftLimite), sizeof(myLeftLimite), MPI_BYTE,
140 141 142
                              mpiComm.global().processId()+1, 0,
                              mpiComm.global().getComm()), __LINE__);
    }
143

144 145 146 147 148 149
    // Put the data into the tree
    GroupOctreeClass groupedTree(NbLevels, loader.getBoxWidth(), loader.getCenterOfBox(), groupSize,
                                 &allParticles, true, leftLimite);

    // Run the algorithm
    GroupKernelClass groupkernel;
150
    GroupAlgorithm groupalgo(mpiComm.global(), &groupedTree,&groupkernel);
151
	mpiComm.global().barrier();
Martin Khannouz's avatar
Martin Khannouz committed
152
	FTic timerExecute;
Martin Khannouz's avatar
Martin Khannouz committed
153
    groupalgo.execute();
BRAMAS Berenger's avatar
BRAMAS Berenger committed
154
    mpiComm.global().barrier();
155
	double elapsedTime = timerExecute.tacAndElapsed();
156
	timeAverage(mpiComm.global().processId(), mpiComm.global().processCount(), elapsedTime);
BRAMAS Berenger's avatar
BRAMAS Berenger committed
157

158
    groupedTree.forEachCellLeaf<GroupContainerClass>([&](GroupCellClass cell, GroupContainerClass* leaf){
159
        const FSize nbPartsInLeaf = leaf->getNbParticles();
160
        const long long int* dataDown = leaf->getDataDown();
161
        for(FSize idxPart = 0 ; idxPart < nbPartsInLeaf ; ++idxPart){
162
            if(dataDown[idxPart] != totalNbParticles-1){
163
                std::cout << "[Full] Error a particle has " << dataDown[idxPart] << " (it should be " << (totalNbParticles-1) << ") at index " << cell.getMortonIndex() << "\n";
164 165 166 167
            }
        }
    });

168

169
    mpiComm.global().barrier();
BRAMAS Berenger's avatar
BRAMAS Berenger committed
170

171
    typedef FTestCell                   CellClass;
172 173 174
    typedef FTestParticleContainer<FReal>      ContainerClass;
    typedef FSimpleLeaf<FReal, ContainerClass >                     LeafClass;
    typedef FOctree<FReal, CellClass, ContainerClass , LeafClass >  OctreeClass;
175 176 177
    typedef FTestKernels< CellClass, ContainerClass >         KernelClass;
    typedef FFmmAlgorithm<OctreeClass, CellClass, ContainerClass, KernelClass, LeafClass >     FmmClass;

178 179 180 181
    {
        // Usual octree
        OctreeClass tree(NbLevels, 2, loader.getBoxWidth(), loader.getCenterOfBox());
        for(int idxProc = 0 ; idxProc < mpiComm.global().processCount() ; ++idxProc){
182
            FRandomLoader<FReal> loaderAll(getNbParticlesPerNode(mpiComm.global().processCount(), idxProc, totalNbParticles), 1.0, FPoint<FReal>(0,0,0), idxProc);
183
            for(FSize idxPart = 0 ; idxPart < loaderAll.getNumberOfParticles() ; ++idxPart){
184
                FPoint<FReal> pos;
185 186
				loaderAll.fillParticle(&pos);
				//loaderAll.fillParticleAtMortonIndex(&pos, idxProc*NbParticles + idxPart,NbLevels);
187 188
                tree.insert(pos);
            }
189
		}
190 191 192 193 194 195
        // Usual algorithm
        KernelClass kernels;            // FTestKernels FBasicKernels
        FmmClass algo(&tree,&kernels);  //FFmmAlgorithm FFmmAlgorithmThread
        algo.execute();

        // Compare the results
196 197
        groupedTree.forEachCellWithLevel([&](GroupCellClass gcell, const int level){
            const CellClass* cell = tree.getCell(gcell.getMortonIndex(), level);
198
            if(cell == nullptr){
199
                std::cout << "[Empty] Error cell should not exist " << gcell.getMortonIndex() << "\n";
200 201
            }
            else {
202 203
                if(gcell.getDataUp() != cell->getDataUp()){
                    std::cout << "[Up] Up is different at index " << gcell.getMortonIndex() << " level " << level << " is " << gcell.getDataUp() << " should be " << cell->getDataUp() << "\n";
204
                }
205 206
                if(gcell.getDataDown() != cell->getDataDown()){
                    std::cout << "[Down] Down is different at index " << gcell.getMortonIndex() << " level " << level << " is " << gcell.getDataDown() << " should be " << cell->getDataDown() << "\n";
207 208 209 210
                }
            }
        });
    }
211

212 213
    return 0;
}
214 215 216 217 218 219 220 221 222
void timeAverage(int mpi_rank, int nproc, double elapsedTime)
{
	if(mpi_rank == 0)
	{
		double sumElapsedTime = elapsedTime;
		for(int i = 1; i < nproc; ++i)
		{
			double tmp;
			MPI_Recv(&tmp, 1, MPI_DOUBLE, i, 0, MPI_COMM_WORLD, 0);
223 224
			if(tmp > sumElapsedTime)
				sumElapsedTime = tmp;
225
		}
226
		std::cout << "Average time per node (implicit Cheby) : " << sumElapsedTime << "s" << std::endl;
227 228 229 230 231
	}
	else
	{
		MPI_Send(&elapsedTime, 1, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD);
	}
232
	MPI_Barrier(MPI_COMM_WORLD);
233
}
234 235 236 237 238
FSize getNbParticlesPerNode(FSize mpi_count, FSize mpi_rank, FSize total){
	if(mpi_rank < (total%mpi_count))
		return ((total - (total%mpi_count))/mpi_count)+1;
	return ((total - (total%mpi_count))/mpi_count);
}