testBlockedMpiChebyshev.cpp 17.5 KB
Newer Older
1 2 3 4
// ==== CMAKE =====
// @FUSE_BLAS
// ================
// Keep in private GIT
5 6
// @FUSE_MPI
// @FUSE_STARPU
7 8 9 10


#include "../../Src/Utils/FGlobal.hpp"

11
#include "../../Src/GroupTree/Core/FGroupTree.hpp"
12 13 14 15 16 17 18 19

#include "../../Src/Components/FSimpleLeaf.hpp"
#include "../../Src/Containers/FVector.hpp"

#include "../../Src/Kernels/P2P/FP2PParticleContainer.hpp"

#include "../../Src/Kernels/Chebyshev/FChebSymKernel.hpp"
#include "../../Src/Kernels/Chebyshev/FChebCell.hpp"
20
#include "../../Src/GroupTree/Chebyshev/FChebCellPOD.hpp"
21 22 23 24 25 26 27 28 29
#include "Kernels/Interpolation/FInterpMatrixKernel.hpp"

#include "../../Src/Utils/FMath.hpp"
#include "../../Src/Utils/FMemUtils.hpp"
#include "../../Src/Utils/FParameters.hpp"

#include "../../Src/Files/FRandomLoader.hpp"
#include "../../Src/Files/FFmaGenericLoader.hpp"

30 31 32 33
#include "../../Src/GroupTree/Core/FGroupSeqAlgorithm.hpp"
#include "../../Src/GroupTree/Core/FGroupTaskAlgorithm.hpp"
#include "../../Src/GroupTree/Core/FGroupTaskStarpuAlgorithm.hpp"
#include "../../Src/GroupTree/Core/FP2PGroupParticleContainer.hpp"
34 35 36 37 38 39 40

#include "../../Src/Utils/FParameterNames.hpp"

#include "../../Src/Components/FTestParticleContainer.hpp"
#include "../../Src/Components/FTestCell.hpp"
#include "../../Src/Components/FTestKernels.hpp"

41
#include "../../Src/Core/FFmmAlgorithmThreadProc.hpp"
42
#include "../../Src/Files/FMpiTreeBuilder.hpp"
43
#include "../../Src/GroupTree/Core/FGroupTaskStarpuMpiAlgorithm.hpp"
44 45

#include "../../Src/Files/FMpiFmaGenericLoader.hpp"
46
#include "../../Src/Files/FGenerateDistribution.hpp"
47
#include "../../Src/Containers/FCoordinateComputer.hpp"
48

49
#include "../../Src/GroupTree/StarPUUtils/FStarPUKernelCapacities.hpp"
50

51 52
#include <memory>

53
void timeAverage(int mpi_rank, int nproc, double elapsedTime);
54
FSize getNbParticlesPerNode(FSize mpi_count, FSize mpi_rank, FSize total);
55 56 57 58

int main(int argc, char* argv[]){
    const FParameterNames LocalOptionBlocSize { {"-bs"}, "The size of the block of the blocked tree"};
    const FParameterNames LocalOptionNoValidate { {"-no-validation"}, "To avoid comparing with direct computation"};
59 60
    const FParameterNames LocalOptionEllipsoid = {{"-ellipsoid"} , " non uniform distribution on  an ellipsoid of aspect ratio given by 0.5:0.25:0.125"};
    const FParameterNames LocalOptionPlummer = {{"-plummer"} , " (Highly non uniform) plummer distribution (astrophysics)"};
61 62
    FHelpDescribeAndExit(argc, argv, "Test the blocked tree by counting the particles.",
                         FParameterDefinitions::OctreeHeight,FParameterDefinitions::InputFile,
63
                         FParameterDefinitions::OctreeSubHeight, FParameterDefinitions::NbParticles,
64
                         LocalOptionBlocSize, LocalOptionNoValidate, LocalOptionEllipsoid, LocalOptionPlummer);
65

66
    typedef double FReal;
67
    // Initialize the types
68
    static const int ORDER = 6;
69
    typedef FInterpMatrixKernelR<FReal> MatrixKernelClass;
70 71

    typedef FChebCellPODCore         GroupCellSymbClass;
72 73 74
    typedef FChebCellPODPole<FReal,ORDER>  GroupCellUpClass;
    typedef FChebCellPODLocal<FReal,ORDER> GroupCellDownClass;
    typedef FChebCellPOD<FReal,ORDER>      GroupCellClass;
75 76


77 78
    typedef FP2PGroupParticleContainer<FReal>          GroupContainerClass;
    typedef FGroupTree< FReal, GroupCellClass, GroupCellSymbClass, GroupCellUpClass, GroupCellDownClass, GroupContainerClass, 1, 4, FReal>  GroupOctreeClass;
79

80
    typedef FStarPUAllCpuCapacities<FChebSymKernel<FReal,GroupCellClass,GroupContainerClass,MatrixKernelClass,ORDER>> GroupKernelClass;
81 82
    typedef FStarPUCpuWrapper<typename GroupOctreeClass::CellGroupClass, GroupCellClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass> GroupCpuWrapper;
    typedef FGroupTaskStarPUMpiAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupCpuWrapper> GroupAlgorithm;
83 84 85 86 87 88 89 90 91

    // Get params
    FTic timer;
    const int groupSize     = FParameters::getValue(argc,argv,LocalOptionBlocSize.options, 250);

    FMpi mpiComm(argc,argv);

    const unsigned int TreeHeight    = FParameters::getValue(argc, argv, FParameterDefinitions::OctreeHeight.options, 5);
    const unsigned int SubTreeHeight = FParameters::getValue(argc, argv, FParameterDefinitions::OctreeSubHeight.options, 2);
92 93
    const FSize totalNbParticles = FParameters::getValue(argc,argv,FParameterDefinitions::NbParticles.options, FSize(20));
    const FSize NbParticles   = getNbParticlesPerNode(mpiComm.global().processCount(), mpiComm.global().processId(), totalNbParticles);
94 95 96

    // init particles position and physical value
    struct TestParticle{
97
        FPoint<FReal> position;
98
        FReal physicalValue;
99
        const FPoint<FReal>& getPosition(){
100 101
            return position;
        }
102 103 104 105 106 107 108 109 110
		const unsigned int getWriteDataSize(void) const {
			return sizeof(FReal);
		}
		const unsigned int getWriteDataNumber(void) const {
			return 3;
		}
		const FReal* getPtrFirstData(void) const {
			return position.data();
		}
111 112
    };

113
//#define LOAD_FILE
114
#ifndef LOAD_FILE
115 116
	FReal boxWidth = 1.0;
    FRandomLoader<FReal> loader(NbParticles, boxWidth, FPoint<FReal>(0,0,0), mpiComm.global().processId());
117 118
    FAssertLF(loader.isOpen());

119
	setSeed(mpiComm.global().processId());
120
    TestParticle* allParticles = new TestParticle[loader.getNumberOfParticles()];
121
	FReal * tmpParticles = new FReal[4*loader.getNumberOfParticles()];
122
    memset(allParticles,0,(unsigned int) (sizeof(TestParticle)* loader.getNumberOfParticles()));
123 124 125 126 127 128 129 130 131 132 133 134
	memset(tmpParticles,0,(unsigned int) (sizeof(FReal)* loader.getNumberOfParticles() * 4));
	if(FParameters::existParameter(argc, argv, "-ellipsoid")) {
		nonunifRandonPointsOnElipsoid(loader.getNumberOfParticles(), boxWidth/2, boxWidth/4, boxWidth/8, tmpParticles);
	}
	else if(FParameters::existParameter(argc, argv, "-plummer")) {
		//The M argument is not used in the algorithm of the plummer distribution
		unifRandonPlummer(loader.getNumberOfParticles(), boxWidth/2, 0.0, tmpParticles) ;
	}
	else { //Uniform cube
		unifRandonPointsOnCube(loader.getNumberOfParticles(), boxWidth/2, boxWidth/2, boxWidth/2, tmpParticles);
	}

135
    for(FSize idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){
136
		allParticles[idxPart].position.setPosition(tmpParticles[idxPart*4], tmpParticles[idxPart*4+1], tmpParticles[idxPart*4+2]);
137
		allParticles[idxPart].physicalValue = 0.1;
138
    }
139
	delete[] tmpParticles;
140
#else
141
    // open particle file
142 143 144 145 146 147 148 149 150 151 152
    const char* const filename = FParameters::getStr(argc,argv,FParameterDefinitions::InputFile.options, "../Data/test20k.fma");
    FMpiFmaGenericLoader<FReal> loader(filename,mpiComm.global());
    FAssertLF(loader.isOpen());

    TestParticle* allParticles = new TestParticle[loader.getMyNumberOfParticles()];
    memset(allParticles,0,(unsigned int) (sizeof(TestParticle)* loader.getMyNumberOfParticles()));
    for(FSize idxPart = 0 ; idxPart < loader.getMyNumberOfParticles() ; ++idxPart){
        loader.fillParticle(&allParticles[idxPart].position,&allParticles[idxPart].physicalValue);
    }
#endif

153 154
    FVector<TestParticle> myParticles;
    FLeafBalance balancer;
155
    FMpiTreeBuilder< FReal,TestParticle >::DistributeArrayToContainer(mpiComm.global(),allParticles,
156
                                                                loader.getNumberOfParticles(),
157 158 159 160
                                                                loader.getCenterOfBox(),
                                                                loader.getBoxWidth(),TreeHeight,
                                                                &myParticles, &balancer);

161
    //std::cout << "\tHeight : " << TreeHeight << " \t sub-height : " << SubTreeHeight << std::endl;
162 163

    // Each proc need to know the righest morton index
164
    const FTreeCoordinate host = FCoordinateComputer::GetCoordinateFromPosition<FReal>(
165 166 167 168
                loader.getCenterOfBox(),
                loader.getBoxWidth(),
                TreeHeight,
                myParticles[myParticles.getSize()-1].position );
169
    const MortonIndex myLeftLimite = host.getMortonIndex();
170 171 172 173 174 175 176
    MortonIndex leftLimite = -1;
    if(mpiComm.global().processId() != 0){
        FMpi::Assert(MPI_Recv(&leftLimite, sizeof(leftLimite), MPI_BYTE,
                              mpiComm.global().processId()-1, 0,
                              mpiComm.global().getComm(), MPI_STATUS_IGNORE), __LINE__);
    }
    if(mpiComm.global().processId() != mpiComm.global().processCount()-1){
177
        FMpi::Assert(MPI_Send(const_cast<MortonIndex*>(&myLeftLimite), sizeof(myLeftLimite), MPI_BYTE,
178 179 180 181 182 183 184
                              mpiComm.global().processId()+1, 0,
                              mpiComm.global().getComm()), __LINE__);
    }
    FLOG(std::cout << "My last index is " << leftLimite << "\n");
    FLOG(std::cout << "My left limite is " << myLeftLimite << "\n");

    // Put the data into the tree
185
    FP2PParticleContainer<FReal> myParticlesInContainer;
186
    for(FSize idxPart = 0 ; idxPart < myParticles.getSize() ; ++idxPart){
187 188
        myParticlesInContainer.push(myParticles[idxPart].position,
                                    myParticles[idxPart].physicalValue);
189 190 191
    }
    GroupOctreeClass groupedTree(TreeHeight, loader.getBoxWidth(), loader.getCenterOfBox(), groupSize,
                                 &myParticlesInContainer, true, leftLimite);
192
    
193
    timer.tac();
194
	std::cerr << "Done  " << "(@Creating and Inserting Particles = " << timer.elapsed() << "s)." << std::endl;
195

196
	int operationsToProceed = FFmmP2M | FFmmM2M | FFmmM2L | FFmmL2L | FFmmL2P | FFmmP2P;
197
    { // -----------------------------------------------------
198
        //std::cout << "\nChebyshev FMM (ORDER="<< ORDER << ") ... " << std::endl;
199

200
        const MatrixKernelClass MatrixKernel;
201 202 203
        // Create Matrix Kernel
        GroupKernelClass groupkernel(TreeHeight, loader.getBoxWidth(), loader.getCenterOfBox(), &MatrixKernel);
        // Run the algorithm
204
        GroupAlgorithm groupalgo(mpiComm.global(), &groupedTree,&groupkernel);
205
		mpiComm.global().barrier();
Martin Khannouz's avatar
Martin Khannouz committed
206
        timer.tic();
207
		starpu_fxt_start_profiling();
208
        groupalgo.execute(operationsToProceed);
209
		mpiComm.global().barrier();
210
		starpu_fxt_stop_profiling();
211
        timer.tac();
212 213
		timeAverage(mpiComm.global().processId(), mpiComm.global().processCount(), timer.elapsed());
        //std::cout << "Done  " << "(@Algorithm = " << timer.elapsed() << "s)." << std::endl;
214 215 216 217
    } // -----------------------------------------------------


    if(FParameters::existParameter(argc, argv, LocalOptionNoValidate.options) == false){
218 219 220 221 222
        typedef FP2PParticleContainer<FReal> ContainerClass;
        typedef FSimpleLeaf<FReal, ContainerClass >  LeafClass;
        typedef FChebCell<FReal,ORDER> CellClass;
        typedef FOctree<FReal, CellClass,ContainerClass,LeafClass> OctreeClass;
        typedef FChebSymKernel<FReal,CellClass,ContainerClass,MatrixKernelClass,ORDER> KernelClass;
223 224 225 226 227 228
        typedef FFmmAlgorithmThreadProc<OctreeClass,CellClass,ContainerClass,KernelClass,LeafClass> FmmClass;

        const FReal epsi = 1E-10;

        OctreeClass treeCheck(TreeHeight, SubTreeHeight,loader.getBoxWidth(),loader.getCenterOfBox());

229
        for(FSize idxPart = 0 ; idxPart < myParticles.getSize() ; ++idxPart){
230 231 232 233
            // put in tree
            treeCheck.insert(myParticles[idxPart].position,
                             myParticles[idxPart].physicalValue);
        }
234

235
        MatrixKernelClass MatrixKernel;
236 237
        KernelClass kernels(TreeHeight, loader.getBoxWidth(), loader.getCenterOfBox(), &MatrixKernel);
        FmmClass algorithm(mpiComm.global(),&treeCheck, &kernels);
238 239
		mpiComm.global().barrier();
        timer.tic();
240
        algorithm.execute(operationsToProceed);
241 242 243
		mpiComm.global().barrier();
        timer.tac();
		timeAverage(mpiComm.global().processId(), mpiComm.global().processCount(), timer.elapsed());
244
        //std::cout << "Algo is over" << std::endl;
245

246 247
        groupedTree.forEachCellWithLevel([&](GroupCellClass gcell, const int level){
            const CellClass* cell = treeCheck.getCell(gcell.getMortonIndex(), level);
248
            if(cell == nullptr){
249
                std::cout << "[Empty] Error cell should exist " << gcell.getMortonIndex() << "\n";
250 251
            }
            else {
252
                FMath::FAccurater<FReal> diffUp;
253
                diffUp.add(cell->getMultipole(0), gcell.getMultipole(0), gcell.getVectorSize());
254
                if(diffUp.getRelativeInfNorm() > epsi || diffUp.getRelativeL2Norm() > epsi){
255
                    std::cout << "[Up] Up is different at index " << gcell.getMortonIndex() << " level " << level << " is " << diffUp << "\n";
256
                }
257
                FMath::FAccurater<FReal> diffDown;
258
                diffDown.add(cell->getLocal(0), gcell.getLocal(0), gcell.getVectorSize());
259
                if(diffDown.getRelativeInfNorm() > epsi || diffDown.getRelativeL2Norm() > epsi){
260
                    std::cout << "[Up] Down is different at index " << gcell.getMortonIndex() << " level " << level << " is " << diffDown << "\n";
261 262
                }
            }
263 264
        });

265
        groupedTree.forEachCellLeaf<FP2PGroupParticleContainer<FReal> >([&](GroupCellClass gcell, FP2PGroupParticleContainer<FReal> * leafTarget){
266
            const ContainerClass* targets = treeCheck.getLeafSrc(gcell.getMortonIndex());
267
            if(targets == nullptr){
268
                std::cout << "[Empty] Error leaf should exist " << gcell.getMortonIndex() << "\n";
269
            }
270 271 272 273
            else{
                const FReal*const gposX = leafTarget->getPositions()[0];
                const FReal*const gposY = leafTarget->getPositions()[1];
                const FReal*const gposZ = leafTarget->getPositions()[2];
274
                const FSize gnbPartsInLeafTarget = leafTarget->getNbParticles();
275 276 277 278 279 280 281 282
                const FReal*const gforceX = leafTarget->getForcesX();
                const FReal*const gforceY = leafTarget->getForcesY();
                const FReal*const gforceZ = leafTarget->getForcesZ();
                const FReal*const gpotential = leafTarget->getPotentials();

                const FReal*const posX = targets->getPositions()[0];
                const FReal*const posY = targets->getPositions()[1];
                const FReal*const posZ = targets->getPositions()[2];
283
                const FSize nbPartsInLeafTarget = targets->getNbParticles();
284 285 286 287 288 289
                const FReal*const forceX = targets->getForcesX();
                const FReal*const forceY = targets->getForcesY();
                const FReal*const forceZ = targets->getForcesZ();
                const FReal*const potential = targets->getPotentials();

                if(gnbPartsInLeafTarget != nbPartsInLeafTarget){
290
                    std::cout << "[Empty] Not the same number of particles at " << gcell.getMortonIndex()
291 292 293
                              << " gnbPartsInLeafTarget " << gnbPartsInLeafTarget << " nbPartsInLeafTarget " << nbPartsInLeafTarget << "\n";
                }
                else{
294 295
                    FMath::FAccurater<FReal> potentialDiff;
                    FMath::FAccurater<FReal> fx, fy, fz;
296
                    for(FSize idxPart = 0 ; idxPart < nbPartsInLeafTarget ; ++idxPart){
297 298
                        if(gposX[idxPart] != posX[idxPart] || gposY[idxPart] != posY[idxPart]
                                || gposZ[idxPart] != posZ[idxPart]){
299
                            std::cout << "[Empty] Not the same particlea at " << gcell.getMortonIndex() << " idx " << idxPart
300 301 302 303 304 305 306 307 308 309 310
                                      << gposX[idxPart] << " " << posX[idxPart] << " " << gposY[idxPart] << " " << posY[idxPart]
                                      << " " << gposZ[idxPart] << " " << posZ[idxPart] << "\n";
                        }
                        else{
                            potentialDiff.add(potential[idxPart], gpotential[idxPart]);
                            fx.add(forceX[idxPart], gforceX[idxPart]);
                            fy.add(forceY[idxPart], gforceY[idxPart]);
                            fz.add(forceZ[idxPart], gforceZ[idxPart]);
                        }
                    }
                    if(potentialDiff.getRelativeInfNorm() > epsi || potentialDiff.getRelativeL2Norm() > epsi){
311
                        std::cout << "[Up] potentialDiff is different at index " << gcell.getMortonIndex() << " is " << potentialDiff << "\n";
312 313
                    }
                    if(fx.getRelativeInfNorm() > epsi || fx.getRelativeL2Norm() > epsi){
314
                        std::cout << "[Up] fx is different at index " << gcell.getMortonIndex() << " is " << fx << "\n";
315 316
                    }
                    if(fy.getRelativeInfNorm() > epsi || fy.getRelativeL2Norm() > epsi){
317
                        std::cout << "[Up] fy is different at index " << gcell.getMortonIndex() << " is " << fy << "\n";
318 319
                    }
                    if(fz.getRelativeInfNorm() > epsi || fz.getRelativeL2Norm() > epsi){
320
                        std::cout << "[Up] fz is different at index " << gcell.getMortonIndex() << " is " << fz << "\n";
321 322
                    }
                }
323 324 325
            }
        });

326
        //std::cout << "Comparing is over" << std::endl;
327 328 329 330 331 332
    }

    return 0;
}


333 334 335 336 337 338 339 340 341
void timeAverage(int mpi_rank, int nproc, double elapsedTime)
{
	if(mpi_rank == 0)
	{
		double sumElapsedTime = elapsedTime;
		for(int i = 1; i < nproc; ++i)
		{
			double tmp;
			MPI_Recv(&tmp, 1, MPI_DOUBLE, i, 0, MPI_COMM_WORLD, 0);
342 343
			if(tmp > sumElapsedTime)
				sumElapsedTime = tmp;
344
		}
345
		std::cout << "Average time per node (implicit Cheby) : " << sumElapsedTime << "s" << std::endl;
346 347 348 349 350 351 352
	}
	else
	{
		MPI_Send(&elapsedTime, 1, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD);
	}
	MPI_Barrier(MPI_COMM_WORLD);
}
353 354 355 356 357
FSize getNbParticlesPerNode(FSize mpi_count, FSize mpi_rank, FSize total){
	if(mpi_rank < (total%mpi_count))
		return ((total - (total%mpi_count))/mpi_count)+1;
	return ((total - (total%mpi_count))/mpi_count);
}