testBlockedMpiChebyshev.cpp 14.9 KB
Newer Older
1 2 3 4
// ==== CMAKE =====
// @FUSE_BLAS
// ================
// Keep in private GIT
5 6
// @FUSE_MPI
// @FUSE_STARPU
7 8 9 10


#include "../../Src/Utils/FGlobal.hpp"

11
#include "../../Src/GroupTree/Core/FGroupTree.hpp"
12 13 14 15 16 17 18 19

#include "../../Src/Components/FSimpleLeaf.hpp"
#include "../../Src/Containers/FVector.hpp"

#include "../../Src/Kernels/P2P/FP2PParticleContainer.hpp"

#include "../../Src/Kernels/Chebyshev/FChebSymKernel.hpp"
#include "../../Src/Kernels/Chebyshev/FChebCell.hpp"
20
#include "../../Src/GroupTree/Chebyshev/FChebCellPOD.hpp"
21 22 23 24 25 26 27 28 29
#include "Kernels/Interpolation/FInterpMatrixKernel.hpp"

#include "../../Src/Utils/FMath.hpp"
#include "../../Src/Utils/FMemUtils.hpp"
#include "../../Src/Utils/FParameters.hpp"

#include "../../Src/Files/FRandomLoader.hpp"
#include "../../Src/Files/FFmaGenericLoader.hpp"

30 31 32 33
#include "../../Src/GroupTree/Core/FGroupSeqAlgorithm.hpp"
#include "../../Src/GroupTree/Core/FGroupTaskAlgorithm.hpp"
#include "../../Src/GroupTree/Core/FGroupTaskStarpuAlgorithm.hpp"
#include "../../Src/GroupTree/Core/FP2PGroupParticleContainer.hpp"
34 35 36 37 38 39 40

#include "../../Src/Utils/FParameterNames.hpp"

#include "../../Src/Components/FTestParticleContainer.hpp"
#include "../../Src/Components/FTestCell.hpp"
#include "../../Src/Components/FTestKernels.hpp"

41
#include "../../Src/Core/FFmmAlgorithmThreadProc.hpp"
42
#include "../../Src/Files/FMpiTreeBuilder.hpp"
43
#include "../../Src/GroupTree/Core/FGroupTaskStarpuMpiAlgorithm.hpp"
44 45

#include "../../Src/Files/FMpiFmaGenericLoader.hpp"
46
#include "../../Src/Containers/FCoordinateComputer.hpp"
47

48
#include "../../Src/GroupTree/StarPUUtils/FStarPUKernelCapacities.hpp"
49

50 51
#include <memory>

52
void timeAverage(int mpi_rank, int nproc, double elapsedTime);
53 54 55 56 57 58

int main(int argc, char* argv[]){
    const FParameterNames LocalOptionBlocSize { {"-bs"}, "The size of the block of the blocked tree"};
    const FParameterNames LocalOptionNoValidate { {"-no-validation"}, "To avoid comparing with direct computation"};
    FHelpDescribeAndExit(argc, argv, "Test the blocked tree by counting the particles.",
                         FParameterDefinitions::OctreeHeight,FParameterDefinitions::InputFile,
59
                         FParameterDefinitions::OctreeSubHeight, FParameterDefinitions::NbParticles,
60 61
                         LocalOptionBlocSize, LocalOptionNoValidate);

62
    typedef double FReal;
63 64
    // Initialize the types
    static const int ORDER = 6;
65
    typedef FInterpMatrixKernelR<FReal> MatrixKernelClass;
66 67

    typedef FChebCellPODCore         GroupCellSymbClass;
68 69 70
    typedef FChebCellPODPole<FReal,ORDER>  GroupCellUpClass;
    typedef FChebCellPODLocal<FReal,ORDER> GroupCellDownClass;
    typedef FChebCellPOD<FReal,ORDER>      GroupCellClass;
71 72


73 74
    typedef FP2PGroupParticleContainer<FReal>          GroupContainerClass;
    typedef FGroupTree< FReal, GroupCellClass, GroupCellSymbClass, GroupCellUpClass, GroupCellDownClass, GroupContainerClass, 1, 4, FReal>  GroupOctreeClass;
75

76
    typedef FStarPUAllCpuCapacities<FChebSymKernel<FReal,GroupCellClass,GroupContainerClass,MatrixKernelClass,ORDER>> GroupKernelClass;
77 78
    typedef FStarPUCpuWrapper<typename GroupOctreeClass::CellGroupClass, GroupCellClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass> GroupCpuWrapper;
    typedef FGroupTaskStarPUMpiAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupCpuWrapper> GroupAlgorithm;
79 80 81 82 83 84 85 86 87

    // Get params
    FTic timer;
    const int groupSize     = FParameters::getValue(argc,argv,LocalOptionBlocSize.options, 250);

    FMpi mpiComm(argc,argv);

    const unsigned int TreeHeight    = FParameters::getValue(argc, argv, FParameterDefinitions::OctreeHeight.options, 5);
    const unsigned int SubTreeHeight = FParameters::getValue(argc, argv, FParameterDefinitions::OctreeSubHeight.options, 2);
88 89
    const FSize NbParticles   = FParameters::getValue(argc,argv,FParameterDefinitions::NbParticles.options, FSize(20));
    const FSize totalNbParticles = (NbParticles*mpiComm.global().processCount());
90 91 92

    // init particles position and physical value
    struct TestParticle{
93
        FPoint<FReal> position;
94
        FReal physicalValue;
95
        const FPoint<FReal>& getPosition(){
96 97
            return position;
        }
98 99 100 101 102 103 104 105 106
		const unsigned int getWriteDataSize(void) const {
			return sizeof(FReal);
		}
		const unsigned int getWriteDataNumber(void) const {
			return 3;
		}
		const FReal* getPtrFirstData(void) const {
			return position.data();
		}
107 108 109
    };

    // open particle file
110
    FRandomLoader<FReal> loader(NbParticles, 1.0, FPoint<FReal>(0,0,0), mpiComm.global().processId());
111 112
    FAssertLF(loader.isOpen());

113 114 115 116 117
    TestParticle* allParticles = new TestParticle[loader.getNumberOfParticles()];
    memset(allParticles,0,(unsigned int) (sizeof(TestParticle)* loader.getNumberOfParticles()));
    for(FSize idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){
        loader.fillParticle(&allParticles[idxPart].position);
		allParticles[idxPart].physicalValue = 0.1;
118 119 120 121
    }

    FVector<TestParticle> myParticles;
    FLeafBalance balancer;
122
    FMpiTreeBuilder< FReal,TestParticle >::DistributeArrayToContainer(mpiComm.global(),allParticles,
123
                                                                loader.getNumberOfParticles(),
124 125 126 127
                                                                loader.getCenterOfBox(),
                                                                loader.getBoxWidth(),TreeHeight,
                                                                &myParticles, &balancer);

128
    //std::cout << "\tHeight : " << TreeHeight << " \t sub-height : " << SubTreeHeight << std::endl;
129 130

    // Each proc need to know the righest morton index
131
    const FTreeCoordinate host = FCoordinateComputer::GetCoordinateFromPosition<FReal>(
132 133 134 135
                loader.getCenterOfBox(),
                loader.getBoxWidth(),
                TreeHeight,
                myParticles[myParticles.getSize()-1].position );
136
    const MortonIndex myLeftLimite = host.getMortonIndex();
137 138 139 140 141 142 143
    MortonIndex leftLimite = -1;
    if(mpiComm.global().processId() != 0){
        FMpi::Assert(MPI_Recv(&leftLimite, sizeof(leftLimite), MPI_BYTE,
                              mpiComm.global().processId()-1, 0,
                              mpiComm.global().getComm(), MPI_STATUS_IGNORE), __LINE__);
    }
    if(mpiComm.global().processId() != mpiComm.global().processCount()-1){
144
        FMpi::Assert(MPI_Send(const_cast<MortonIndex*>(&myLeftLimite), sizeof(myLeftLimite), MPI_BYTE,
145 146 147 148 149 150 151
                              mpiComm.global().processId()+1, 0,
                              mpiComm.global().getComm()), __LINE__);
    }
    FLOG(std::cout << "My last index is " << leftLimite << "\n");
    FLOG(std::cout << "My left limite is " << myLeftLimite << "\n");

    // Put the data into the tree
152
    FP2PParticleContainer<FReal> myParticlesInContainer;
153
    for(FSize idxPart = 0 ; idxPart < myParticles.getSize() ; ++idxPart){
154 155
        myParticlesInContainer.push(myParticles[idxPart].position,
                                    myParticles[idxPart].physicalValue);
156 157 158
    }
    GroupOctreeClass groupedTree(TreeHeight, loader.getBoxWidth(), loader.getCenterOfBox(), groupSize,
                                 &myParticlesInContainer, true, leftLimite);
159
    //groupedTree.printInfoBlocks();
160
    timer.tac();
161
	//std::cerr << "Done  " << "(@Creating and Inserting Particles = " << timer.elapsed() << "s)." << std::endl;
162 163

    { // -----------------------------------------------------
164
        //std::cout << "\nChebyshev FMM (ORDER="<< ORDER << ") ... " << std::endl;
165

166
        const MatrixKernelClass MatrixKernel;
167 168 169
        // Create Matrix Kernel
        GroupKernelClass groupkernel(TreeHeight, loader.getBoxWidth(), loader.getCenterOfBox(), &MatrixKernel);
        // Run the algorithm
170
        GroupAlgorithm groupalgo(mpiComm.global(), &groupedTree,&groupkernel);
Martin Khannouz's avatar
Martin Khannouz committed
171
        timer.tic();
172
        groupalgo.execute();
173
		mpiComm.global().barrier();
174
        timer.tac();
175 176
		timeAverage(mpiComm.global().processId(), mpiComm.global().processCount(), timer.elapsed());
        //std::cout << "Done  " << "(@Algorithm = " << timer.elapsed() << "s)." << std::endl;
177 178 179 180
    } // -----------------------------------------------------


    if(FParameters::existParameter(argc, argv, LocalOptionNoValidate.options) == false){
181 182 183 184 185
        typedef FP2PParticleContainer<FReal> ContainerClass;
        typedef FSimpleLeaf<FReal, ContainerClass >  LeafClass;
        typedef FChebCell<FReal,ORDER> CellClass;
        typedef FOctree<FReal, CellClass,ContainerClass,LeafClass> OctreeClass;
        typedef FChebSymKernel<FReal,CellClass,ContainerClass,MatrixKernelClass,ORDER> KernelClass;
186 187 188 189 190 191
        typedef FFmmAlgorithmThreadProc<OctreeClass,CellClass,ContainerClass,KernelClass,LeafClass> FmmClass;

        const FReal epsi = 1E-10;

        OctreeClass treeCheck(TreeHeight, SubTreeHeight,loader.getBoxWidth(),loader.getCenterOfBox());

192
        for(FSize idxPart = 0 ; idxPart < myParticles.getSize() ; ++idxPart){
193 194 195 196
            // put in tree
            treeCheck.insert(myParticles[idxPart].position,
                             myParticles[idxPart].physicalValue);
        }
197

198
        MatrixKernelClass MatrixKernel;
199 200 201
        KernelClass kernels(TreeHeight, loader.getBoxWidth(), loader.getCenterOfBox(), &MatrixKernel);
        FmmClass algorithm(mpiComm.global(),&treeCheck, &kernels);
        algorithm.execute();
202
        //std::cout << "Algo is over" << std::endl;
203

204 205
        groupedTree.forEachCellWithLevel([&](GroupCellClass gcell, const int level){
            const CellClass* cell = treeCheck.getCell(gcell.getMortonIndex(), level);
206
            if(cell == nullptr){
207
                std::cout << "[Empty] Error cell should exist " << gcell.getMortonIndex() << "\n";
208 209
            }
            else {
210
                FMath::FAccurater<FReal> diffUp;
211
                diffUp.add(cell->getMultipole(0), gcell.getMultipole(0), gcell.getVectorSize());
212
                if(diffUp.getRelativeInfNorm() > epsi || diffUp.getRelativeL2Norm() > epsi){
213
                    std::cout << "[Up] Up is different at index " << gcell.getMortonIndex() << " level " << level << " is " << diffUp << "\n";
214
                }
215
                FMath::FAccurater<FReal> diffDown;
216
                diffDown.add(cell->getLocal(0), gcell.getLocal(0), gcell.getVectorSize());
217
                if(diffDown.getRelativeInfNorm() > epsi || diffDown.getRelativeL2Norm() > epsi){
218
                    std::cout << "[Up] Down is different at index " << gcell.getMortonIndex() << " level " << level << " is " << diffDown << "\n";
219 220
                }
            }
221 222
        });

223
        groupedTree.forEachCellLeaf<FP2PGroupParticleContainer<FReal> >([&](GroupCellClass gcell, FP2PGroupParticleContainer<FReal> * leafTarget){
224
            const ContainerClass* targets = treeCheck.getLeafSrc(gcell.getMortonIndex());
225
            if(targets == nullptr){
226
                std::cout << "[Empty] Error leaf should exist " << gcell.getMortonIndex() << "\n";
227
            }
228 229 230 231
            else{
                const FReal*const gposX = leafTarget->getPositions()[0];
                const FReal*const gposY = leafTarget->getPositions()[1];
                const FReal*const gposZ = leafTarget->getPositions()[2];
232
                const FSize gnbPartsInLeafTarget = leafTarget->getNbParticles();
233 234 235 236 237 238 239 240
                const FReal*const gforceX = leafTarget->getForcesX();
                const FReal*const gforceY = leafTarget->getForcesY();
                const FReal*const gforceZ = leafTarget->getForcesZ();
                const FReal*const gpotential = leafTarget->getPotentials();

                const FReal*const posX = targets->getPositions()[0];
                const FReal*const posY = targets->getPositions()[1];
                const FReal*const posZ = targets->getPositions()[2];
241
                const FSize nbPartsInLeafTarget = targets->getNbParticles();
242 243 244 245 246 247
                const FReal*const forceX = targets->getForcesX();
                const FReal*const forceY = targets->getForcesY();
                const FReal*const forceZ = targets->getForcesZ();
                const FReal*const potential = targets->getPotentials();

                if(gnbPartsInLeafTarget != nbPartsInLeafTarget){
248
                    std::cout << "[Empty] Not the same number of particles at " << gcell.getMortonIndex()
249 250 251
                              << " gnbPartsInLeafTarget " << gnbPartsInLeafTarget << " nbPartsInLeafTarget " << nbPartsInLeafTarget << "\n";
                }
                else{
252 253
                    FMath::FAccurater<FReal> potentialDiff;
                    FMath::FAccurater<FReal> fx, fy, fz;
254
                    for(FSize idxPart = 0 ; idxPart < nbPartsInLeafTarget ; ++idxPart){
255 256
                        if(gposX[idxPart] != posX[idxPart] || gposY[idxPart] != posY[idxPart]
                                || gposZ[idxPart] != posZ[idxPart]){
257
                            std::cout << "[Empty] Not the same particlea at " << gcell.getMortonIndex() << " idx " << idxPart
258 259 260 261 262 263 264 265 266 267 268
                                      << gposX[idxPart] << " " << posX[idxPart] << " " << gposY[idxPart] << " " << posY[idxPart]
                                      << " " << gposZ[idxPart] << " " << posZ[idxPart] << "\n";
                        }
                        else{
                            potentialDiff.add(potential[idxPart], gpotential[idxPart]);
                            fx.add(forceX[idxPart], gforceX[idxPart]);
                            fy.add(forceY[idxPart], gforceY[idxPart]);
                            fz.add(forceZ[idxPart], gforceZ[idxPart]);
                        }
                    }
                    if(potentialDiff.getRelativeInfNorm() > epsi || potentialDiff.getRelativeL2Norm() > epsi){
269
                        std::cout << "[Up] potentialDiff is different at index " << gcell.getMortonIndex() << " is " << potentialDiff << "\n";
270 271
                    }
                    if(fx.getRelativeInfNorm() > epsi || fx.getRelativeL2Norm() > epsi){
272
                        std::cout << "[Up] fx is different at index " << gcell.getMortonIndex() << " is " << fx << "\n";
273 274
                    }
                    if(fy.getRelativeInfNorm() > epsi || fy.getRelativeL2Norm() > epsi){
275
                        std::cout << "[Up] fy is different at index " << gcell.getMortonIndex() << " is " << fy << "\n";
276 277
                    }
                    if(fz.getRelativeInfNorm() > epsi || fz.getRelativeL2Norm() > epsi){
278
                        std::cout << "[Up] fz is different at index " << gcell.getMortonIndex() << " is " << fz << "\n";
279 280
                    }
                }
281 282 283
            }
        });

284
        //std::cout << "Comparing is over" << std::endl;
285 286 287 288 289 290
    }

    return 0;
}


291 292 293 294 295 296 297 298 299
void timeAverage(int mpi_rank, int nproc, double elapsedTime)
{
	if(mpi_rank == 0)
	{
		double sumElapsedTime = elapsedTime;
		for(int i = 1; i < nproc; ++i)
		{
			double tmp;
			MPI_Recv(&tmp, 1, MPI_DOUBLE, i, 0, MPI_COMM_WORLD, 0);
300 301
			if(tmp > sumElapsedTime)
				sumElapsedTime = tmp;
302
		}
303
		std::cout << "Average time per node (implicit Cheby) : " << sumElapsedTime << "s" << std::endl;
304 305 306 307 308 309 310
	}
	else
	{
		MPI_Send(&elapsedTime, 1, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD);
	}
	MPI_Barrier(MPI_COMM_WORLD);
}