testBlockedMpiChebyshev.cpp 17.2 KB
Newer Older
1 2 3 4
// ==== CMAKE =====
// @FUSE_BLAS
// ================
// Keep in private GIT
5 6
// @FUSE_MPI
// @FUSE_STARPU
7 8 9 10


#include "../../Src/Utils/FGlobal.hpp"

11
#include "../../Src/GroupTree/Core/FGroupTree.hpp"
12 13 14 15 16 17 18 19

#include "../../Src/Components/FSimpleLeaf.hpp"
#include "../../Src/Containers/FVector.hpp"

#include "../../Src/Kernels/P2P/FP2PParticleContainer.hpp"

#include "../../Src/Kernels/Chebyshev/FChebSymKernel.hpp"
#include "../../Src/Kernels/Chebyshev/FChebCell.hpp"
20
#include "../../Src/GroupTree/Chebyshev/FChebCellPOD.hpp"
21 22 23 24 25 26 27 28 29
#include "Kernels/Interpolation/FInterpMatrixKernel.hpp"

#include "../../Src/Utils/FMath.hpp"
#include "../../Src/Utils/FMemUtils.hpp"
#include "../../Src/Utils/FParameters.hpp"

#include "../../Src/Files/FRandomLoader.hpp"
#include "../../Src/Files/FFmaGenericLoader.hpp"

30 31 32 33
#include "../../Src/GroupTree/Core/FGroupSeqAlgorithm.hpp"
#include "../../Src/GroupTree/Core/FGroupTaskAlgorithm.hpp"
#include "../../Src/GroupTree/Core/FGroupTaskStarpuAlgorithm.hpp"
#include "../../Src/GroupTree/Core/FP2PGroupParticleContainer.hpp"
34 35 36 37 38 39 40

#include "../../Src/Utils/FParameterNames.hpp"

#include "../../Src/Components/FTestParticleContainer.hpp"
#include "../../Src/Components/FTestCell.hpp"
#include "../../Src/Components/FTestKernels.hpp"

41
#include "../../Src/Core/FFmmAlgorithmThreadProc.hpp"
42
#include "../../Src/Files/FMpiTreeBuilder.hpp"
43
#include "../../Src/GroupTree/Core/FGroupTaskStarpuMpiAlgorithm.hpp"
44 45

#include "../../Src/Files/FMpiFmaGenericLoader.hpp"
46
#include "../../Src/Containers/FCoordinateComputer.hpp"
47

48
#include "../../Src/GroupTree/StarPUUtils/FStarPUKernelCapacities.hpp"
49

50 51
#include <memory>

52
void timeAverage(int mpi_rank, int nproc, double elapsedTime);
53 54 55 56 57 58

int main(int argc, char* argv[]){
    const FParameterNames LocalOptionBlocSize { {"-bs"}, "The size of the block of the blocked tree"};
    const FParameterNames LocalOptionNoValidate { {"-no-validation"}, "To avoid comparing with direct computation"};
    FHelpDescribeAndExit(argc, argv, "Test the blocked tree by counting the particles.",
                         FParameterDefinitions::OctreeHeight,FParameterDefinitions::InputFile,
59
                         FParameterDefinitions::OctreeSubHeight, FParameterDefinitions::NbParticles,
60 61
                         LocalOptionBlocSize, LocalOptionNoValidate);

62
    typedef double FReal;
63 64
    // Initialize the types
    static const int ORDER = 6;
65
    typedef FInterpMatrixKernelR<FReal> MatrixKernelClass;
66 67

    typedef FChebCellPODCore         GroupCellSymbClass;
68 69 70
    typedef FChebCellPODPole<FReal,ORDER>  GroupCellUpClass;
    typedef FChebCellPODLocal<FReal,ORDER> GroupCellDownClass;
    typedef FChebCellPOD<FReal,ORDER>      GroupCellClass;
71 72


73 74
    typedef FP2PGroupParticleContainer<FReal>          GroupContainerClass;
    typedef FGroupTree< FReal, GroupCellClass, GroupCellSymbClass, GroupCellUpClass, GroupCellDownClass, GroupContainerClass, 1, 4, FReal>  GroupOctreeClass;
75

76
    typedef FStarPUAllCpuCapacities<FChebSymKernel<FReal,GroupCellClass,GroupContainerClass,MatrixKernelClass,ORDER>> GroupKernelClass;
77 78
    typedef FStarPUCpuWrapper<typename GroupOctreeClass::CellGroupClass, GroupCellClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass> GroupCpuWrapper;
    typedef FGroupTaskStarPUMpiAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupCpuWrapper> GroupAlgorithm;
79 80 81 82 83 84 85 86 87

    // Get params
    FTic timer;
    const int groupSize     = FParameters::getValue(argc,argv,LocalOptionBlocSize.options, 250);

    FMpi mpiComm(argc,argv);

    const unsigned int TreeHeight    = FParameters::getValue(argc, argv, FParameterDefinitions::OctreeHeight.options, 5);
    const unsigned int SubTreeHeight = FParameters::getValue(argc, argv, FParameterDefinitions::OctreeSubHeight.options, 2);
88 89
    const FSize NbParticles   = FParameters::getValue(argc,argv,FParameterDefinitions::NbParticles.options, FSize(20));
    const FSize totalNbParticles = (NbParticles*mpiComm.global().processCount());
90 91 92

    // init particles position and physical value
    struct TestParticle{
93
        FPoint<FReal> position;
94
        FReal physicalValue;
95
        const FPoint<FReal>& getPosition(){
96 97
            return position;
        }
98 99 100 101 102 103 104 105 106
		const unsigned int getWriteDataSize(void) const {
			return sizeof(FReal);
		}
		const unsigned int getWriteDataNumber(void) const {
			return 3;
		}
		const FReal* getPtrFirstData(void) const {
			return position.data();
		}
107 108 109
    };

    // open particle file
110
    FRandomLoader<FReal> loader(NbParticles, 1.0, FPoint<FReal>(0,0,0), mpiComm.global().processId());
111 112
    FAssertLF(loader.isOpen());

113 114 115 116 117
    TestParticle* allParticles = new TestParticle[loader.getNumberOfParticles()];
    memset(allParticles,0,(unsigned int) (sizeof(TestParticle)* loader.getNumberOfParticles()));
    for(FSize idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){
        loader.fillParticle(&allParticles[idxPart].position);
		allParticles[idxPart].physicalValue = 0.1;
118 119 120 121
    }

    FVector<TestParticle> myParticles;
    FLeafBalance balancer;
122
    FMpiTreeBuilder< FReal,TestParticle >::DistributeArrayToContainer(mpiComm.global(),allParticles,
123
                                                                loader.getNumberOfParticles(),
124 125 126 127
                                                                loader.getCenterOfBox(),
                                                                loader.getBoxWidth(),TreeHeight,
                                                                &myParticles, &balancer);

128
    //std::cout << "\tHeight : " << TreeHeight << " \t sub-height : " << SubTreeHeight << std::endl;
129 130

    // Each proc need to know the righest morton index
131
    const FTreeCoordinate host = FCoordinateComputer::GetCoordinateFromPosition<FReal>(
132 133 134 135
                loader.getCenterOfBox(),
                loader.getBoxWidth(),
                TreeHeight,
                myParticles[myParticles.getSize()-1].position );
136
    const MortonIndex myLeftLimite = host.getMortonIndex();
137 138 139 140 141 142 143
    MortonIndex leftLimite = -1;
    if(mpiComm.global().processId() != 0){
        FMpi::Assert(MPI_Recv(&leftLimite, sizeof(leftLimite), MPI_BYTE,
                              mpiComm.global().processId()-1, 0,
                              mpiComm.global().getComm(), MPI_STATUS_IGNORE), __LINE__);
    }
    if(mpiComm.global().processId() != mpiComm.global().processCount()-1){
144
        FMpi::Assert(MPI_Send(const_cast<MortonIndex*>(&myLeftLimite), sizeof(myLeftLimite), MPI_BYTE,
145 146 147 148 149 150
                              mpiComm.global().processId()+1, 0,
                              mpiComm.global().getComm()), __LINE__);
    }
    FLOG(std::cout << "My last index is " << leftLimite << "\n");
    FLOG(std::cout << "My left limite is " << myLeftLimite << "\n");

151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191
	//Save particles in a file
	if(mpiComm.global().processId() == 0){
		std::cerr << "Exchange particle to create the file" << std::endl;
		std::vector<TestParticle*> particlesGathered;
		std::vector<int> sizeGathered;
		
		//Ajout des mes particules
		int sizeofParticle = sizeof(TestParticle)*myParticles.getSize();
		sizeGathered.push_back(sizeofParticle);
		particlesGathered.push_back(new TestParticle[sizeofParticle]);
		memcpy(particlesGathered.back(), myParticles.data(), sizeofParticle);
		//Recupération des particules des autres
		for(int i = 1; i < mpiComm.global().processCount(); ++i)
		{
			int sizeReceive;
			MPI_Recv(&sizeReceive, sizeof(sizeReceive), MPI_BYTE, i, 0, mpiComm.global().getComm(), MPI_STATUS_IGNORE);
			sizeGathered.push_back(sizeReceive);
			particlesGathered.push_back(new TestParticle[sizeReceive]);
			MPI_Recv(particlesGathered.back(), sizeReceive, MPI_BYTE, i, 0, mpiComm.global().getComm(), MPI_STATUS_IGNORE);
		}
		int sum = 0;
		for(int a : sizeGathered)
			sum += a/sizeof(TestParticle);
		if(sum != totalNbParticles)
			std::cerr << "Erreur sum : " << sum << " instead of " << totalNbParticles << std::endl;
		//Store in that bloody file
		FFmaGenericWriter<FReal> writer("canard.fma");
		writer.writeHeader(loader.getCenterOfBox(), loader.getBoxWidth(),totalNbParticles, allParticles[0]);
		for(unsigned int i = 0; i < particlesGathered.size(); ++i)
			writer.writeArrayOfParticles(particlesGathered[i], sizeGathered[i]/sizeof(TestParticle));
		for(TestParticle* ptr : particlesGathered)
			delete ptr;
		std::cerr << "Done exchanging !" << std::endl;
	}
	else{
		int sizeofParticle = sizeof(TestParticle)*myParticles.getSize();
		MPI_Send(&sizeofParticle, sizeof(sizeofParticle), MPI_BYTE, 0, 0, mpiComm.global().getComm());//Send size
		MPI_Send(myParticles.data(), sizeofParticle, MPI_BYTE, 0, 0, mpiComm.global().getComm());
		MPI_Send(const_cast<MortonIndex*>(&leftLimite), sizeof(leftLimite), MPI_BYTE, 0, 0, mpiComm.global().getComm());
		MPI_Send(const_cast<MortonIndex*>(&myLeftLimite), sizeof(myLeftLimite), MPI_BYTE, 0, 0, mpiComm.global().getComm());
	}
192 193

    // Put the data into the tree
194
    FP2PParticleContainer<FReal> myParticlesInContainer;
195
    for(FSize idxPart = 0 ; idxPart < myParticles.getSize() ; ++idxPart){
196 197
        myParticlesInContainer.push(myParticles[idxPart].position,
                                    myParticles[idxPart].physicalValue);
198 199 200
    }
    GroupOctreeClass groupedTree(TreeHeight, loader.getBoxWidth(), loader.getCenterOfBox(), groupSize,
                                 &myParticlesInContainer, true, leftLimite);
201
    //groupedTree.printInfoBlocks();
202
    timer.tac();
203
	//std::cerr << "Done  " << "(@Creating and Inserting Particles = " << timer.elapsed() << "s)." << std::endl;
204 205

    { // -----------------------------------------------------
206
        //std::cout << "\nChebyshev FMM (ORDER="<< ORDER << ") ... " << std::endl;
207

208
        const MatrixKernelClass MatrixKernel;
209 210 211
        // Create Matrix Kernel
        GroupKernelClass groupkernel(TreeHeight, loader.getBoxWidth(), loader.getCenterOfBox(), &MatrixKernel);
        // Run the algorithm
212
        GroupAlgorithm groupalgo(mpiComm.global(), &groupedTree,&groupkernel);
Martin Khannouz's avatar
Martin Khannouz committed
213
        timer.tic();
214 215 216
        groupalgo.execute();

        timer.tac();
217 218
		timeAverage(mpiComm.global().processId(), mpiComm.global().processCount(), timer.elapsed());
        //std::cout << "Done  " << "(@Algorithm = " << timer.elapsed() << "s)." << std::endl;
219 220 221 222
    } // -----------------------------------------------------


    if(FParameters::existParameter(argc, argv, LocalOptionNoValidate.options) == false){
223 224 225 226 227
        typedef FP2PParticleContainer<FReal> ContainerClass;
        typedef FSimpleLeaf<FReal, ContainerClass >  LeafClass;
        typedef FChebCell<FReal,ORDER> CellClass;
        typedef FOctree<FReal, CellClass,ContainerClass,LeafClass> OctreeClass;
        typedef FChebSymKernel<FReal,CellClass,ContainerClass,MatrixKernelClass,ORDER> KernelClass;
228 229 230 231 232 233
        typedef FFmmAlgorithmThreadProc<OctreeClass,CellClass,ContainerClass,KernelClass,LeafClass> FmmClass;

        const FReal epsi = 1E-10;

        OctreeClass treeCheck(TreeHeight, SubTreeHeight,loader.getBoxWidth(),loader.getCenterOfBox());

234
        for(FSize idxPart = 0 ; idxPart < myParticles.getSize() ; ++idxPart){
235 236 237 238
            // put in tree
            treeCheck.insert(myParticles[idxPart].position,
                             myParticles[idxPart].physicalValue);
        }
239

240
        MatrixKernelClass MatrixKernel;
241 242 243
        KernelClass kernels(TreeHeight, loader.getBoxWidth(), loader.getCenterOfBox(), &MatrixKernel);
        FmmClass algorithm(mpiComm.global(),&treeCheck, &kernels);
        algorithm.execute();
244
        //std::cout << "Algo is over" << std::endl;
245

246 247
        groupedTree.forEachCellWithLevel([&](GroupCellClass gcell, const int level){
            const CellClass* cell = treeCheck.getCell(gcell.getMortonIndex(), level);
248
            if(cell == nullptr){
249
                std::cout << "[Empty] Error cell should exist " << gcell.getMortonIndex() << "\n";
250 251
            }
            else {
252
                FMath::FAccurater<FReal> diffUp;
253
                diffUp.add(cell->getMultipole(0), gcell.getMultipole(0), gcell.getVectorSize());
254
                if(diffUp.getRelativeInfNorm() > epsi || diffUp.getRelativeL2Norm() > epsi){
255
                    std::cout << "[Up] Up is different at index " << gcell.getMortonIndex() << " level " << level << " is " << diffUp << "\n";
256
                }
257
                FMath::FAccurater<FReal> diffDown;
258
                diffDown.add(cell->getLocal(0), gcell.getLocal(0), gcell.getVectorSize());
259
                if(diffDown.getRelativeInfNorm() > epsi || diffDown.getRelativeL2Norm() > epsi){
260
                    std::cout << "[Up] Down is different at index " << gcell.getMortonIndex() << " level " << level << " is " << diffDown << "\n";
261 262
                }
            }
263 264
        });

265
        groupedTree.forEachCellLeaf<FP2PGroupParticleContainer<FReal> >([&](GroupCellClass gcell, FP2PGroupParticleContainer<FReal> * leafTarget){
266
            const ContainerClass* targets = treeCheck.getLeafSrc(gcell.getMortonIndex());
267
            if(targets == nullptr){
268
                std::cout << "[Empty] Error leaf should exist " << gcell.getMortonIndex() << "\n";
269
            }
270 271 272 273
            else{
                const FReal*const gposX = leafTarget->getPositions()[0];
                const FReal*const gposY = leafTarget->getPositions()[1];
                const FReal*const gposZ = leafTarget->getPositions()[2];
274
                const FSize gnbPartsInLeafTarget = leafTarget->getNbParticles();
275 276 277 278 279 280 281 282
                const FReal*const gforceX = leafTarget->getForcesX();
                const FReal*const gforceY = leafTarget->getForcesY();
                const FReal*const gforceZ = leafTarget->getForcesZ();
                const FReal*const gpotential = leafTarget->getPotentials();

                const FReal*const posX = targets->getPositions()[0];
                const FReal*const posY = targets->getPositions()[1];
                const FReal*const posZ = targets->getPositions()[2];
283
                const FSize nbPartsInLeafTarget = targets->getNbParticles();
284 285 286 287 288 289
                const FReal*const forceX = targets->getForcesX();
                const FReal*const forceY = targets->getForcesY();
                const FReal*const forceZ = targets->getForcesZ();
                const FReal*const potential = targets->getPotentials();

                if(gnbPartsInLeafTarget != nbPartsInLeafTarget){
290
                    std::cout << "[Empty] Not the same number of particles at " << gcell.getMortonIndex()
291 292 293
                              << " gnbPartsInLeafTarget " << gnbPartsInLeafTarget << " nbPartsInLeafTarget " << nbPartsInLeafTarget << "\n";
                }
                else{
294 295
                    FMath::FAccurater<FReal> potentialDiff;
                    FMath::FAccurater<FReal> fx, fy, fz;
296
                    for(FSize idxPart = 0 ; idxPart < nbPartsInLeafTarget ; ++idxPart){
297 298
                        if(gposX[idxPart] != posX[idxPart] || gposY[idxPart] != posY[idxPart]
                                || gposZ[idxPart] != posZ[idxPart]){
299
                            std::cout << "[Empty] Not the same particlea at " << gcell.getMortonIndex() << " idx " << idxPart
300 301 302 303 304 305 306 307 308 309 310
                                      << gposX[idxPart] << " " << posX[idxPart] << " " << gposY[idxPart] << " " << posY[idxPart]
                                      << " " << gposZ[idxPart] << " " << posZ[idxPart] << "\n";
                        }
                        else{
                            potentialDiff.add(potential[idxPart], gpotential[idxPart]);
                            fx.add(forceX[idxPart], gforceX[idxPart]);
                            fy.add(forceY[idxPart], gforceY[idxPart]);
                            fz.add(forceZ[idxPart], gforceZ[idxPart]);
                        }
                    }
                    if(potentialDiff.getRelativeInfNorm() > epsi || potentialDiff.getRelativeL2Norm() > epsi){
311
                        std::cout << "[Up] potentialDiff is different at index " << gcell.getMortonIndex() << " is " << potentialDiff << "\n";
312 313
                    }
                    if(fx.getRelativeInfNorm() > epsi || fx.getRelativeL2Norm() > epsi){
314
                        std::cout << "[Up] fx is different at index " << gcell.getMortonIndex() << " is " << fx << "\n";
315 316
                    }
                    if(fy.getRelativeInfNorm() > epsi || fy.getRelativeL2Norm() > epsi){
317
                        std::cout << "[Up] fy is different at index " << gcell.getMortonIndex() << " is " << fy << "\n";
318 319
                    }
                    if(fz.getRelativeInfNorm() > epsi || fz.getRelativeL2Norm() > epsi){
320
                        std::cout << "[Up] fz is different at index " << gcell.getMortonIndex() << " is " << fz << "\n";
321 322
                    }
                }
323 324 325
            }
        });

326
        //std::cout << "Comparing is over" << std::endl;
327 328 329 330 331 332
    }

    return 0;
}


333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354
void timeAverage(int mpi_rank, int nproc, double elapsedTime)
{
	if(mpi_rank == 0)
	{
		double sumElapsedTime = elapsedTime;
		std::cout << "Executing time node 0 (explicit Cheby) : " << sumElapsedTime << "s" << std::endl;
		for(int i = 1; i < nproc; ++i)
		{
			double tmp;
			MPI_Recv(&tmp, 1, MPI_DOUBLE, i, 0, MPI_COMM_WORLD, 0);
			sumElapsedTime += tmp;
			std::cout << "Executing time node " << i << " (explicit Cheby) : " << tmp << "s" << std::endl;
		}
		sumElapsedTime = sumElapsedTime / (double)nproc;
		std::cout << "Average time per node (explicit Cheby) : " << sumElapsedTime << "s" << std::endl;
	}
	else
	{
		MPI_Send(&elapsedTime, 1, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD);
	}
	MPI_Barrier(MPI_COMM_WORLD);
}