diff --git a/Sources/Core/FFmmAlgorithm.hpp b/Sources/Core/FFmmAlgorithm.hpp index d0ab240c58bd4dfbfa05aaa1bdedf3e2685e2eda..bbe1380acc16cd9eac21988e6083f23356ce9422 100644 --- a/Sources/Core/FFmmAlgorithm.hpp +++ b/Sources/Core/FFmmAlgorithm.hpp @@ -1,5 +1,5 @@ -#ifndef FFmmALGORITHM_HPP -#define FFmmALGORITHM_HPP +#ifndef FFMMALGORITHM_HPP +#define FFMMALGORITHM_HPP // /!\ Please, you must read the license at the bottom of this page #include "../Utils/FAssertable.hpp" @@ -232,6 +232,6 @@ public: }; -#endif //FFmmALGORITHM_HPP +#endif //FFMMALGORITHM_HPP // [--LICENSE--] diff --git a/Sources/Core/FFmmAlgorithmArray.hpp b/Sources/Core/FFmmAlgorithmThread.hpp similarity index 97% rename from Sources/Core/FFmmAlgorithmArray.hpp rename to Sources/Core/FFmmAlgorithmThread.hpp index 5d0bc35694b8be701133e6dde95a42760ba73d69..0f35610cd7bc1175371a12969021b1997cf244a2 100644 --- a/Sources/Core/FFmmAlgorithmArray.hpp +++ b/Sources/Core/FFmmAlgorithmThread.hpp @@ -1,5 +1,5 @@ -#ifndef FFmmALGORITHMARRAY_HPP -#define FFmmALGORITHMARRAY_HPP +#ifndef FFMMALGORITHMTHREAD_HPP +#define FFMMALGORITHMTHREAD_HPP // /!\ Please, you must read the license at the bottom of this page #include "../Utils/FAssertable.hpp" @@ -15,7 +15,7 @@ /** * @author Berenger Bramas (berenger.bramas@inria.fr) -* @class FFmmAlgorithmArray +* @class FFmmAlgorithmThread * @brief * Please read the license * @@ -33,7 +33,7 @@ template<template< class ParticleClass, class CellClass, int OctreeHeight> class class ParticleClass, class CellClass, template<class ParticleClass> class LeafClass, int OctreeHeight, int SubtreeHeight> -class FFmmAlgorithmArray : protected FAssertable{ +class FFmmAlgorithmThread : protected FAssertable{ // To reduce the size of variable type based on foctree in this file typedef FOctree<ParticleClass, CellClass, LeafClass, OctreeHeight, SubtreeHeight> Octree; typedef typename FOctree<ParticleClass, CellClass,LeafClass, OctreeHeight, SubtreeHeight>::Iterator OctreeIterator; @@ -56,7 +56,7 @@ public: * @param inKernels the kernels to call * An assert is launched if one of the arguments is null */ - FFmmAlgorithmArray(Octree* const inTree, Kernel* const inKernels) + FFmmAlgorithmThread(Octree* const inTree, Kernel* const inKernels) : tree(inTree) , iterArray(0) { assert(tree, "tree cannot be null", __LINE__, __FILE__); @@ -66,11 +66,11 @@ public: this->kernels[idxThread] = new KernelClass<ParticleClass, CellClass, OctreeHeight>(*inKernels); } - FDEBUG(FDebug::Controller << "FFmmAlgorithmArray\n"); + FDEBUG(FDebug::Controller << "FFmmAlgorithmThread\n"); } /** Default destructor */ - virtual ~FFmmAlgorithmArray(){ + virtual ~FFmmAlgorithmThread(){ for(int idxThread = 0 ; idxThread < FThreadNumbers ; ++idxThread){ delete this->kernels[idxThread]; } @@ -347,6 +347,6 @@ public: }; -#endif //FFmmALGORITHMARRAY_HPP +#endif //FFMMALGORITHMTHREAD_HPP // [--LICENSE--] diff --git a/Sources/Core/FFmmAlgorithmThreadProc.hpp b/Sources/Core/FFmmAlgorithmThreadProc.hpp new file mode 100644 index 0000000000000000000000000000000000000000..565f31ba89b9ef095c97fc0e8c1d46a834435fd7 --- /dev/null +++ b/Sources/Core/FFmmAlgorithmThreadProc.hpp @@ -0,0 +1,572 @@ +#ifndef FFMMALGORITHMTHREADPROC_HPP +#define FFMMALGORITHMTHREADPROC_HPP +// /!\ Please, you must read the license at the bottom of this page + +#include "../Utils/FAssertable.hpp" +#include "../Utils/FDebug.hpp" +#include "../Utils/FTrace.hpp" +#include "../Utils/FTic.hpp" +#include "../Utils/FGlobal.hpp" + +#include "../Containers/FOctree.hpp" + + +//================================================================================================ +#ifdef FUSE_MPI +// Compile by mpic++ testApplication.cpp ../Utils/FAssertable.cpp -o testApplication.exe +// run by mpirun -np 4 ./testApplication.exe +#include "../Utils/FMpiApplication.hpp" +typedef FMpiApplication ApplicationImplementation; +#else +// Compile by g++ testApplication.cpp ../Utils/FAssertable.cpp -o testApplication.exe +#include "../Utils/FSingleApplication.hpp" +typedef FSingleApplication ApplicationImplementation; +#endif +//================================================================================================ + +#include <omp.h> + +/** +* @author Berenger Bramas (berenger.bramas@inria.fr) +* @class FFmmAlgorithmThreadProc +* @brief +* Please read the license +* +* This class is a threaded FMM algorithm +* It just iterates on a tree and call the kernels with good arguments. +* It used the inspector-executor model : +* iterates on the tree and builds an array to work in parallel on this array +* +* Of course this class does not deallocate pointer given in arguements. +* +* Threaded & based on the inspector-executor model +* schedule(runtime) +*/ +template<template< class ParticleClass, class CellClass, int OctreeHeight> class KernelClass, + class ParticleClass, class CellClass, + template<class ParticleClass> class LeafClass, + int OctreeHeight, int SubtreeHeight> +class FFmmAlgorithmThreadProc : protected FAssertable, protected ApplicationImplementation{ + // To reduce the size of variable type based on foctree in this file + typedef FOctree<ParticleClass, CellClass, LeafClass, OctreeHeight, SubtreeHeight> Octree; + typedef typename FOctree<ParticleClass, CellClass,LeafClass, OctreeHeight, SubtreeHeight>::Iterator OctreeIterator; + typedef KernelClass<ParticleClass, CellClass, OctreeHeight> Kernel; + + Octree* const tree; //< The octree to work on + Kernel* kernels[FThreadNumbers]; //< The kernels + + FDEBUG(FTic counterTime); //< In case of debug: to count the elapsed time + FDEBUG(FTic computationCounter); //< In case of debug: to count computation time + + OctreeIterator* iterArray; + + static const int SizeShape = 3*3*3; + int shapeLeaf[SizeShape]; + + void run(){} + +public: + /** The constructor need the octree and the kernels used for computation + * @param inTree the octree to work on + * @param inKernels the kernels to call + * An assert is launched if one of the arguments is null + */ + FFmmAlgorithmThreadProc(Octree* const inTree, Kernel* const inKernels, const int inArgc, char ** const inArgv ) + : ApplicationImplementation(inArgc,inArgv), tree(inTree) , iterArray(0) { + + assert(tree, "tree cannot be null", __LINE__, __FILE__); + assert(kernels, "kernels cannot be null", __LINE__, __FILE__); + + for(int idxThread = 0 ; idxThread < FThreadNumbers ; ++idxThread){ + this->kernels[idxThread] = new KernelClass<ParticleClass, CellClass, OctreeHeight>(*inKernels); + } + + FDEBUG(FDebug::Controller << "FFmmAlgorithmThreadProc\n"); + } + + /** Default destructor */ + virtual ~FFmmAlgorithmThreadProc(){ + for(int idxThread = 0 ; idxThread < FThreadNumbers ; ++idxThread){ + delete this->kernels[idxThread]; + } + } + + /** + * To execute the fmm algorithm + * Call this function to run the complete algorithm + */ + void execute(){ + FTRACE( FTrace::Controller.enterFunction(FTrace::FMM, __FUNCTION__ , __FILE__ , __LINE__) ); + + for(int idxShape = 0 ; idxShape < SizeShape ; ++idxShape){ + this->shapeLeaf[idxShape] = 0; + } + const int LeafIndex = OctreeHeight - 1; + + // Count leaf + int leafs = 0; + OctreeIterator octreeIterator(tree); + octreeIterator.gotoBottomLeft(); + do{ + ++leafs; + const MortonIndex index = octreeIterator.getCurrentGlobalIndex(); + FTreeCoordinate coord; + coord.setPositionFromMorton(index, LeafIndex); + ++this->shapeLeaf[(coord.getX()%3)*9 + (coord.getY()%3)*3 + (coord.getZ()%3)]; + + } while(octreeIterator.moveRight()); + iterArray = new OctreeIterator[leafs]; + assert(iterArray, "iterArray bad alloc", __LINE__, __FILE__); + + for(int idxThread = 0 ; idxThread < FThreadNumbers ; ++idxThread){ + this->kernels[idxThread]->init(); + } + + bottomPass(); + upwardPass(); + + downardPass(); + + directPass(); + + delete [] iterArray; + iterArray = 0; + + FTRACE( FTrace::Controller.leaveFunction(FTrace::FMM) ); + } + + /** P2M */ + void bottomPass(){ + FTRACE( FTrace::Controller.enterFunction(FTrace::FMM, __FUNCTION__ , __FILE__ , __LINE__) ); + FDEBUG( FDebug::Controller.write("\tStart Bottom Pass\n").write(FDebug::Flush) ); + FDEBUG( counterTime.tic() ); + + OctreeIterator octreeIterator(tree); + const int nbProcess = processCount(); + const int idPorcess = processId(); + int leafs = 0; + // Iterate on leafs + octreeIterator.gotoBottomLeft(); + do{ + iterArray[leafs] = octreeIterator; + ++leafs; + } while(octreeIterator.moveRight()); + + const float stepIdx = float(leafs) / nbProcess; + const int startIdx = idPorcess*stepIdx; + const int endIdx = (idPorcess+1)*stepIdx; + + FDEBUG(computationCounter.tic()); + #pragma omp parallel num_threads(FThreadNumbers) + { + Kernel * const myThreadkernels = kernels[omp_get_thread_num()]; + #pragma omp for + for(int idxLeafs = startIdx ; idxLeafs < endIdx ; ++idxLeafs){ + // We need the current cell that represent the leaf + // and the list of particles + myThreadkernels->P2M( iterArray[idxLeafs].getCurrentCell() , iterArray[idxLeafs].getCurrentListSources()); + } + + #pragma omp single nowait + { + for(int idxLeafs = startIdx ; idxLeafs < endIdx ; ++idxLeafs){ + for(int idxProc = 0 ; idxProc < nbProcess ; ++idxProc){ + if(idxProc != idPorcess){ + sendData(idxProc,sizeof(CellClass),iterArray[idxLeafs].getCurrentCell(),idxLeafs); + } + } + } + } + + #pragma omp single nowait + { + int needToReceive = leafs - (endIdx-startIdx); + CellClass tempCell; + int source = 0, tag = 0, filled = 0; + + while(needToReceive){ + receiveData(sizeof(CellClass),&tempCell,&source,&tag,&filled); + if(filled){ + *iterArray[tag].getCurrentCell() = tempCell; + } + --needToReceive; + } + } + } + FDEBUG(computationCounter.tac()); + + processBarrier(); + + FDEBUG( counterTime.tac() ); + FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.elapsed() << "s)\n" ); + FDEBUG( FDebug::Controller << "\t\t Computation : " << computationCounter.elapsed() << " s\n" ); + FTRACE( FTrace::Controller.leaveFunction(FTrace::FMM) ); + } + + /** M2M */ + void upwardPass(){ + FTRACE( FTrace::Controller.enterFunction(FTrace::FMM, __FUNCTION__ , __FILE__ , __LINE__) ); + FDEBUG( FDebug::Controller.write("\tStart Upward Pass\n").write(FDebug::Flush); ); + FDEBUG( counterTime.tic() ); + FDEBUG( double totalComputation = 0 ); + + // Start from leal level - 1 + OctreeIterator octreeIterator(tree); + octreeIterator.gotoBottomLeft(); + octreeIterator.moveUp(); + OctreeIterator avoidGotoLeftIterator(octreeIterator); + + const int nbProcess = processCount(); + const int idPorcess = processId(); + + // for each levels + for(int idxLevel = OctreeHeight - 2 ; idxLevel > 1 ; --idxLevel ){ + int leafs = 0; + // for each cells + do{ + iterArray[leafs] = octreeIterator; + ++leafs; + } while(octreeIterator.moveRight()); + avoidGotoLeftIterator.moveUp(); + octreeIterator = avoidGotoLeftIterator;// equal octreeIterator.moveUp(); octreeIterator.gotoLeft(); + + + const float stepIdx = float(leafs) / nbProcess; + const int startIdx = idPorcess*stepIdx; + const int endIdx = (idPorcess+1)*stepIdx; + + FDEBUG(computationCounter.tic()); + + #pragma omp parallel num_threads(FThreadNumbers) + { + Kernel * const myThreadkernels = kernels[omp_get_thread_num()]; + #pragma omp for + for(int idxLeafs = startIdx ; idxLeafs < endIdx ; ++idxLeafs){ + myThreadkernels->M2M( iterArray[idxLeafs].getCurrentCell() , iterArray[idxLeafs].getCurrentChild(), idxLevel); + } + + #pragma omp single nowait + { + FDEBUG(computationCounter.tac()); + FDEBUG(totalComputation += computationCounter.elapsed()); + } + + // send computed data + #pragma omp single nowait + { + for(int idxLeafs = startIdx ; idxLeafs < endIdx ; ++idxLeafs){ + for(int idxProc = 0 ; idxProc < nbProcess ; ++idxProc){ + if(idxProc != idPorcess){ + sendData(idxProc,sizeof(CellClass),iterArray[idxLeafs].getCurrentCell(),idxLeafs); + } + } + } + } + + // received computed data + #pragma omp single nowait + { + int needToReceive = leafs - (endIdx-startIdx); + CellClass tempCell; + int source = 0, tag = 0, filled = 0; + + while(needToReceive){ + receiveData(sizeof(CellClass),&tempCell,&source,&tag,&filled); + if(filled){ + *iterArray[tag].getCurrentCell() = tempCell; + } + --needToReceive; + } + } + } + processBarrier(); + } + + FDEBUG( counterTime.tac() ); + FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.elapsed() << "s)\n" ); + FDEBUG( FDebug::Controller << "\t\t Computation : " << totalComputation << " s\n" ); + FTRACE( FTrace::Controller.leaveFunction(FTrace::FMM) ); + } + + /** M2L L2L */ + void downardPass(){ + FTRACE( FTrace::Controller.enterFunction(FTrace::FMM, __FUNCTION__ , __FILE__ , __LINE__) ); + FDEBUG( FDebug::Controller.write("\tStart Downward Pass (M2L)\n").write(FDebug::Flush); ); + FDEBUG( counterTime.tic() ); + FDEBUG( double totalComputation = 0 ); + + { // first M2L + OctreeIterator octreeIterator(tree); + octreeIterator.moveDown(); + OctreeIterator avoidGotoLeftIterator(octreeIterator); + + const int nbProcess = processCount(); + const int idPorcess = processId(); + + + // for each levels + for(int idxLevel = 2 ; idxLevel < OctreeHeight ; ++idxLevel ){ + int leafs = 0; + // for each cells + do{ + iterArray[leafs] = octreeIterator; + ++leafs; + } while(octreeIterator.moveRight()); + avoidGotoLeftIterator.moveDown(); + octreeIterator = avoidGotoLeftIterator; + + const float stepIdx = float(leafs) / nbProcess; + const int startIdx = idPorcess*stepIdx; + const int endIdx = (idPorcess+1)*stepIdx; + + #pragma omp parallel num_threads(FThreadNumbers) + { + Kernel * const myThreadkernels = kernels[omp_get_thread_num()]; + CellClass* neighbors[208]; + + #pragma omp for + for(int idxLeafs = startIdx ; idxLeafs < endIdx ; ++idxLeafs){ + const int counter = tree->getDistantNeighbors(neighbors, iterArray[idxLeafs].getCurrentGlobalIndex(),idxLevel); + if(counter) myThreadkernels->M2L( iterArray[idxLeafs].getCurrentCell() , neighbors, counter, idxLevel); + } + + #pragma omp single nowait + { + for(int idxLeafs = startIdx ; idxLeafs < endIdx ; ++idxLeafs){ + for(int idxProc = 0 ; idxProc < nbProcess ; ++idxProc){ + if(idxProc != idPorcess){ + sendData(idxProc,sizeof(CellClass),iterArray[idxLeafs].getCurrentCell(),idxLeafs); + } + } + } + } + + // received computed data + #pragma omp single nowait + { + int needToReceive = leafs - (endIdx-startIdx); + CellClass tempCell; + int source = 0, tag = 0, filled = 0; + + while(needToReceive){ + receiveData(sizeof(CellClass),&tempCell,&source,&tag,&filled); + if(filled){ + *iterArray[tag].getCurrentCell() = tempCell; + } + --needToReceive; + } + } + } + processBarrier(); + } + } + FDEBUG( counterTime.tac() ); + FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.elapsed() << "s)\n" ); + FDEBUG( FDebug::Controller << "\t\t Computation : " << totalComputation << " s\n" ); + + FDEBUG( FDebug::Controller.write("\tStart Downward Pass (L2L)\n").write(FDebug::Flush); ); + FDEBUG( counterTime.tic() ); + FDEBUG( totalComputation = 0 ); + { // second L2L + OctreeIterator octreeIterator(tree); + octreeIterator.moveDown(); + + OctreeIterator avoidGotoLeftIterator(octreeIterator); + + const int heightMinusOne = OctreeHeight - 1; + const int nbProcess = processCount(); + const int idPorcess = processId(); + + // for each levels exepted leaf level + for(int idxLevel = 2 ; idxLevel < heightMinusOne ; ++idxLevel ){ + int leafs = 0; + // for each cells + do{ + iterArray[leafs] = octreeIterator; + ++leafs; + } while(octreeIterator.moveRight()); + avoidGotoLeftIterator.moveDown(); + octreeIterator = avoidGotoLeftIterator; + + const float stepIdx = float(leafs) / nbProcess; + const int startIdx = idPorcess*stepIdx; + const int endIdx = (idPorcess+1)*stepIdx; + + #pragma omp parallel num_threads(FThreadNumbers) + { + Kernel * const myThreadkernels = kernels[omp_get_thread_num()]; + #pragma omp for + for(int idxLeafs = startIdx ; idxLeafs < endIdx ; ++idxLeafs){ + myThreadkernels->L2L( iterArray[idxLeafs].getCurrentCell() , iterArray[idxLeafs].getCurrentChild(), idxLevel); + } + + #pragma omp single nowait + { + const int sizeBuffer = 8 * sizeof(CellClass); + char buffer[sizeBuffer]; + for(int idxLeafs = startIdx ; idxLeafs < endIdx ; ++idxLeafs){ + int position=0; + for(int idxChild = 0 ; idxChild < 8 ; ++idxChild){ + if(iterArray[idxLeafs].getCurrentChild()[idxChild]){ + memcpy(&buffer[position*sizeof(CellClass)],iterArray[idxLeafs].getCurrentChild()[idxChild],sizeof(CellClass)); + ++position; + } + } + + for(int idxProc = 0 ; idxProc < nbProcess ; ++idxProc){ + if(idxProc != idPorcess){ + sendData(idxProc,position*sizeof(CellClass),buffer,idxLeafs); + } + } + } + } + + // received computed data + #pragma omp single nowait + { + int needToReceive = leafs - (endIdx-startIdx); + int source = 0, tag = 0, filled = 0; + const int sizeBuffer = 8 * sizeof(CellClass); + char buffer[sizeBuffer]; + + while(needToReceive){ + receiveData(sizeBuffer,buffer,&source,&tag,&filled); + if(filled){ + int position = 0; + for(int idxChild = 0 ; idxChild < 8 ; ++idxChild){ + if(iterArray[tag].getCurrentChild()[idxChild]){ + memcpy(iterArray[tag].getCurrentChild()[idxChild],&buffer[position*sizeof(CellClass)],sizeof(CellClass)); + ++position; + } + } + } + --needToReceive; + } + } + } + processBarrier(); + } + } + + FDEBUG( counterTime.tac() ); + FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.elapsed() << "s)\n" ); + FDEBUG( FDebug::Controller << "\t\t Computation : " << totalComputation << " s\n" ); + FTRACE( FTrace::Controller.leaveFunction(FTrace::FMM) ); + } + + /** P2P */ + void directPass(){ + FTRACE( FTrace::Controller.enterFunction(FTrace::FMM, __FUNCTION__ , __FILE__ , __LINE__) ); + FDEBUG( FDebug::Controller.write("\tStart Direct Pass\n").write(FDebug::Flush); ); + FDEBUG( counterTime.tic() ); + + const int LeafIndex = OctreeHeight - 1; + int leafs = 0; + { + OctreeIterator octreeIterator(tree); + octreeIterator.gotoBottomLeft(); + // for each leafs + do{ + iterArray[leafs] = octreeIterator; + ++leafs; + } while(octreeIterator.moveRight()); + } + + FDEBUG(computationCounter.tic()); + + #pragma omp parallel num_threads(FThreadNumbers) + { + Kernel * const myThreadkernels = kernels[omp_get_thread_num()]; + // There is a maximum of 26 neighbors + FList<ParticleClass*>* neighbors[26]; + + const float stepIdx = float(leafs) / processCount(); + const int startIdx = processId()*stepIdx; + const int endIdx = (processId()+1)*stepIdx; + + #pragma omp for + for(int idxLeafs = startIdx ; idxLeafs < endIdx ; ++idxLeafs){ + myThreadkernels->L2P(iterArray[idxLeafs].getCurrentCell(), iterArray[idxLeafs].getCurrentListTargets()); + // need the current particles and neighbors particles + const int counter = tree->getLeafsNeighbors(neighbors, iterArray[idxLeafs].getCurrentGlobalIndex(),LeafIndex); + myThreadkernels->P2P( iterArray[idxLeafs].getCurrentListTargets(), iterArray[idxLeafs].getCurrentListSources() , neighbors, counter); + } + } + FDEBUG(computationCounter.tac()); + + FDEBUG( counterTime.tac() ); + FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.elapsed() << "s)\n" ); + FDEBUG( FDebug::Controller << "\t\t Computation : " << computationCounter.elapsed() << " s\n" ); + FTRACE( FTrace::Controller.leaveFunction(FTrace::FMM) ); + } + + /** This function test the octree to be sure that the fmm algorithm + * has worked completly. + */ + void ValidateFMMAlgoProc(){ + std::cout << "Check Result\n"; + int NbPart = 0; + int NbLeafs = 0; + { // Check that each particle has been summed with all other + OctreeIterator octreeIterator(tree); + octreeIterator.gotoBottomLeft(); + do{ + NbPart += octreeIterator.getCurrentListSources()->getSize(); + ++NbLeafs; + } while(octreeIterator.moveRight()); + } + { + const float stepIdx = float(NbLeafs) / processCount(); + const int startIdx = processId()*stepIdx; + const int endIdx = (processId()+1)*stepIdx; + // Check that each particle has been summed with all other + OctreeIterator octreeIterator(tree); + octreeIterator.gotoBottomLeft(); + + for(int idx = 0 ; idx < startIdx ; ++idx){ + octreeIterator.moveRight(); + } + + for(int idx = startIdx ; idx < endIdx ; ++idx){ + typename FList<ParticleClass*>::BasicIterator iter(*octreeIterator.getCurrentListTargets()); + + const bool isUsingTsm = (octreeIterator.getCurrentListTargets() != octreeIterator.getCurrentListSources()); + + while( iter.isValide() ){ + // If a particles has been impacted by less than NbPart - 1 (the current particle) + // there is a problem + if( (!isUsingTsm && iter.value()->getDataDown() != NbPart - 1) || + (isUsingTsm && iter.value()->getDataDown() != NbPart) ){ + std::cout << "Problem L2P + P2P, value on particle is : " << iter.value()->getDataDown() << "\n"; + } + iter.progress(); + } + octreeIterator.moveRight(); + } + } + + std::cout << "Done\n"; + } + + void print(){ + OctreeIterator octreeIterator(tree); + for(int idxLevel = OctreeHeight - 1 ; idxLevel > 1 ; --idxLevel ){ + do{ + std::cout << "[" << octreeIterator.getCurrentGlobalIndex() << "] up:" << octreeIterator.getCurrentCell()->getDataUp() << " down:" << octreeIterator.getCurrentCell()->getDataDown() << "\t"; + } while(octreeIterator.moveRight()); + std::cout << "\n"; + octreeIterator.gotoLeft(); + octreeIterator.moveDown(); + } + } + +}; + + + + + + +#endif //FFMMALGORITHMTHREAD_HPP + +// [--LICENSE--] diff --git a/Sources/Core/FFmmAlgorithmArrayTsm.hpp b/Sources/Core/FFmmAlgorithmThreadTsm.hpp similarity index 97% rename from Sources/Core/FFmmAlgorithmArrayTsm.hpp rename to Sources/Core/FFmmAlgorithmThreadTsm.hpp index b875d582605504d28b58b8ad835bd57aec937781..c420048f8a09e95b8b23f8f000b516370916f900 100644 --- a/Sources/Core/FFmmAlgorithmArrayTsm.hpp +++ b/Sources/Core/FFmmAlgorithmThreadTsm.hpp @@ -1,5 +1,5 @@ -#ifndef FFmmALGORITHMARRAYTSM_HPP -#define FFmmALGORITHMARRAYTSM_HPP +#ifndef FFMMALGORITHMTHREADTSM_HPP +#define FFMMALGORITHMTHREADTSM_HPP // /!\ Please, you must read the license at the bottom of this page #include "../Utils/FAssertable.hpp" @@ -15,7 +15,7 @@ /** * @author Berenger Bramas (berenger.bramas@inria.fr) -* @class FFmmAlgorithmArrayTsm +* @class FFmmAlgorithmThreadTsm * @brief * Please read the license * @@ -33,7 +33,7 @@ template<template< class ParticleClass, class CellClass, int OctreeHeight> class class ParticleClass, class CellClass, template<class ParticleClass> class LeafClass, int OctreeHeight, int SubtreeHeight> -class FFmmAlgorithmArrayTsm : protected FAssertable{ +class FFmmAlgorithmThreadTsm : protected FAssertable{ // To reduce the size of variable type based on foctree in this file typedef FOctree<ParticleClass, CellClass, LeafClass, OctreeHeight, SubtreeHeight> Octree; typedef typename FOctree<ParticleClass, CellClass,LeafClass, OctreeHeight, SubtreeHeight>::Iterator OctreeIterator; @@ -53,7 +53,7 @@ public: * @param inKernels the kernels to call * An assert is launched if one of the arguments is null */ - FFmmAlgorithmArrayTsm(Octree* const inTree, Kernel* const inKernels) + FFmmAlgorithmThreadTsm(Octree* const inTree, Kernel* const inKernels) : tree(inTree) , iterArray(0) { assert(tree, "tree cannot be null", __LINE__, __FILE__); @@ -63,11 +63,11 @@ public: this->kernels[idxThread] = new KernelClass<ParticleClass, CellClass, OctreeHeight>(*inKernels); } - FDEBUG(FDebug::Controller << "FFmmAlgorithmArrayTsm\n"); + FDEBUG(FDebug::Controller << "FFmmAlgorithmThreadTsm\n"); } /** Default destructor */ - virtual ~FFmmAlgorithmArrayTsm(){ + virtual ~FFmmAlgorithmThreadTsm(){ for(int idxThread = 0 ; idxThread < FThreadNumbers ; ++idxThread){ delete this->kernels[idxThread]; } @@ -362,6 +362,6 @@ public: }; -#endif //FFmmALGORITHMARRAYTSM_HPP +#endif //FFMMALGORITHMTHREADTSM_HPP // [--LICENSE--] diff --git a/Sources/Core/FFmmAlgorithmArrayUs.hpp b/Sources/Core/FFmmAlgorithmThreadUs.hpp similarity index 97% rename from Sources/Core/FFmmAlgorithmArrayUs.hpp rename to Sources/Core/FFmmAlgorithmThreadUs.hpp index 5aebd1d5406be133e01b5217aade280d76ad40e5..cc0d6dac9d938732f527519654311d255019738c 100644 --- a/Sources/Core/FFmmAlgorithmArrayUs.hpp +++ b/Sources/Core/FFmmAlgorithmThreadUs.hpp @@ -1,5 +1,5 @@ -#ifndef FFmmALGORITHMARRAYUS_HPP -#define FFmmALGORITHMARRAYUS_HPP +#ifndef FFMMALGORITHMTHREADUS_HPP +#define FFMMALGORITHMTHREADUS_HPP // /!\ Please, you must read the license at the bottom of this page #include "../Utils/FAssertable.hpp" @@ -15,7 +15,7 @@ /** * @author Berenger Bramas (berenger.bramas@inria.fr) -* @class FFmmAlgorithmArrayUs +* @class FFmmAlgorithmThreadUs * @brief * Please read the license * @@ -33,7 +33,7 @@ template<template< class ParticleClass, class CellClass, int OctreeHeight> class class ParticleClass, class CellClass, template<class ParticleClass> class LeafClass, int OctreeHeight, int SubtreeHeight> -class FFmmAlgorithmArrayUs : protected FAssertable{ +class FFmmAlgorithmThreadUs : protected FAssertable{ // To reduce the size of variable type based on foctree in this file typedef FOctree<ParticleClass, CellClass, LeafClass, OctreeHeight, SubtreeHeight> Octree; typedef typename FOctree<ParticleClass, CellClass,LeafClass, OctreeHeight, SubtreeHeight>::Iterator OctreeIterator; @@ -53,7 +53,7 @@ public: * @param inKernels the kernels to call * An assert is launched if one of the arguments is null */ - FFmmAlgorithmArrayUs(Octree* const inTree, Kernel* const inKernels) + FFmmAlgorithmThreadUs(Octree* const inTree, Kernel* const inKernels) : tree(inTree) , iterArray(0) { assert(tree, "tree cannot be null", __LINE__, __FILE__); @@ -63,11 +63,11 @@ public: this->kernels[idxThread] = new KernelClass<ParticleClass, CellClass, OctreeHeight>(*inKernels); } - FDEBUG(FDebug::Controller << "FFmmAlgorithmArrayUs\n"); + FDEBUG(FDebug::Controller << "FFmmAlgorithmThreadUs\n"); } /** Default destructor */ - virtual ~FFmmAlgorithmArrayUs(){ + virtual ~FFmmAlgorithmThreadUs(){ for(int idxThread = 0 ; idxThread < FThreadNumbers ; ++idxThread){ delete this->kernels[idxThread]; } @@ -313,6 +313,6 @@ public: }; -#endif //FFmmALGORITHMARRAYUS_HPP +#endif //FFMMALGORITHMTHREADUS_HPP // [--LICENSE--] diff --git a/Sources/Core/FFmmAlgorithmTsm.hpp b/Sources/Core/FFmmAlgorithmTsm.hpp index cb1fe7d9e13d5c9dddf09e2db55eb72156d7f21c..ac1ceacdc03dfbcede022ebea88e9739388a97ce 100644 --- a/Sources/Core/FFmmAlgorithmTsm.hpp +++ b/Sources/Core/FFmmAlgorithmTsm.hpp @@ -1,5 +1,5 @@ -#ifndef FFmmALGORITHMTSM_HPP -#define FFmmALGORITHMTSM_HPP +#ifndef FFMMALGORITHMTSM_HPP +#define FFMMALGORITHMTSM_HPP // /!\ Please, you must read the license at the bottom of this page #include "../Utils/FAssertable.hpp" @@ -283,6 +283,6 @@ public: }; -#endif //FFmmALGORITHMTSM_HPP +#endif //FFMMALGORITHMTSM_HPP // [--LICENSE--] diff --git a/Sources/Fmb/FFmbKernels.hpp b/Sources/Fmb/FFmbKernels.hpp index 787fe5a6ed53de6219d1a73bf736d8cab4d59592..9d738ef2147389a1e136a894a172953702a8d0d2 100644 --- a/Sources/Fmb/FFmbKernels.hpp +++ b/Sources/Fmb/FFmbKernels.hpp @@ -292,7 +292,7 @@ protected: cosSin[idxl].setReal( FMath::Sin(angle + FMath::FPiDiv2) ); cosSin[idxl].setImag( FMath::Sin(angle) ); - //printf("%d=%f/%f (%d/%f/%f)\n",idxl,cosSin[idxl].getReal(),cosSin[idxl].getImag(),idxl,inSphere.phi,this->PiArrayInner[idxlMod4]); + //printf("%d=%e/%e (%d/%e/%e)\n",idxl,cosSin[idxl].getReal(),cosSin[idxl].getImag(),idxl,inSphere.phi,this->PiArrayInner[idxlMod4]); } // p_associated_Legendre_function_Array @@ -301,7 +301,7 @@ protected: legendreFunction(FMB_Info_P,inSphere.cosTheta, inSphere.sinTheta, legendre); /*printf("FMB_Info_M2L_exp_size=%d\n",FMB_Info_M2L_exp_size); for(int temp = 0 ; temp < FMB_Info_M2L_exp_size ; ++temp){ - printf("%f\n",this->legendre[temp]); + printf("%e\n",this->legendre[temp]); }*/ FComplexe* currentResult = outResults; @@ -317,7 +317,7 @@ protected: currentResult->setImag( magnitude * cosSin[idxm].getImag() ); //printf("l = %d m = %d\n",idxl,idxm); - //printf("magnitude=%f idxRl=%f sphereHarmoInnerCoef=%f real=%f imag=%f\n",magnitude,idxRl,this->sphereHarmoInnerCoef[idxSphereHarmoCoef],currentResult->getReal(),currentResult->getImag()); + //printf("magnitude=%e idxRl=%e sphereHarmoInnerCoef=%e real=%e imag=%e\n",magnitude,idxRl,this->sphereHarmoInnerCoef[idxSphereHarmoCoef],currentResult->getReal(),currentResult->getImag()); } } @@ -333,7 +333,7 @@ protected: cosSin[idxl].setReal( FMath::Sin(angle + FMath::FPiDiv2) ); cosSin[idxl].setImag( FMath::Sin(angle) ); - //printf("l=%d \t inSphere.phi=%f \t this->PiArrayOuter[idxlMod4]=%f \t angle=%f \t FMath::Sin(angle + FMath::FPiDiv2)=%f \t FMath::Sin(angle)=%f\n", + //printf("l=%d \t inSphere.phi=%e \t this->PiArrayOuter[idxlMod4]=%e \t angle=%e \t FMath::Sin(angle + FMath::FPiDiv2)=%e \t FMath::Sin(angle)=%e\n", // idxl, inSphere.phi, this->PiArrayOuter[idxlMod4], angle, FMath::Sin(angle + FMath::FPiDiv2) , FMath::Sin(angle)); } @@ -352,8 +352,8 @@ protected: const FReal magnitude = this->sphereHarmoOuterCoef[idxl-idxm] * idxRl1 * legendre[idxLegendre]; currentResult->setReal( magnitude * cosSin[idxm].getReal() ); currentResult->setImag( magnitude * cosSin[idxm].getImag() ); - //printf("l=%d\t m=%d\t idxRl1=%f\t magnitude=%f\n",idxl,idxm,idxRl1,magnitude); - //printf("l=%d\t m=%d\t cosSin[idxm].getReal()=%f\t cosSin[idxm].getImag()=%f\n", + //printf("l=%d\t m=%d\t idxRl1=%e\t magnitude=%e\n",idxl,idxm,idxRl1,magnitude); + //printf("l=%d\t m=%d\t cosSin[idxm].getReal()=%e\t cosSin[idxm].getImag()=%e\n", // idxl,idxm,cosSin[idxm].getReal(),cosSin[idxm].getImag()); } } @@ -397,7 +397,7 @@ protected: // p_precomputed_cos_and_sin_array FComplexe cosSin[FMB_Info_M2L_P + 1]; - //printf("HarmoInnerTheta \t lmax = %d \t r = %f \t cos_theta = %f \t sin_theta = %f \t phi = %f\n", + //printf("HarmoInnerTheta \t lmax = %d \t r = %e \t cos_theta = %e \t sin_theta = %e \t phi = %e\n", // FMB_Info_P,inSphere.r,inSphere.cosTheta,inSphere.sinTheta,inSphere.phi); // Initialization of precomputed_cos_and_sin_array: @@ -407,7 +407,7 @@ protected: cosSin[idxm].setReal(FMath::Sin(angle + FMath::FPiDiv2)); cosSin[idxm].setImag(FMath::Sin(angle)); - //printf("l=%d \t inSphere.phi=%f \t this->PiArrayOuter[idxlMod4]=%f \t angle=%f \t FMath::Sin(angle + FMath::FPiDiv2)=%f \t FMath::Sin(angle)=%f\n", + //printf("l=%d \t inSphere.phi=%e \t this->PiArrayOuter[idxlMod4]=%e \t angle=%e \t FMath::Sin(angle + FMath::FPiDiv2)=%e \t FMath::Sin(angle)=%e\n", // idxm, inSphere.phi, this->PiArrayInner[idxmMod4], angle, FMath::Sin(angle + FMath::FPiDiv2) , FMath::Sin(angle)); } @@ -457,7 +457,7 @@ protected: p_theta_derivated_term->setReal(magnitude * cosSin[m].getReal()); p_theta_derivated_term->setImag(magnitude * cosSin[m].getImag()); - //printf("magnitude=%f r_l=%f p_spherical_harmonic_Inner_coefficients_array=%f real=%f imag=%f\n", + //printf("magnitude=%e r_l=%e p_spherical_harmonic_Inner_coefficients_array=%e real=%e imag=%e\n", // magnitude,r_l,*p_spherical_harmonic_Inner_coefficients_array,p_theta_derivated_term->getReal(),p_theta_derivated_term->getImag()); } @@ -485,7 +485,7 @@ protected: p_theta_derivated_term->setReal(magnitude * cosSin[m].getReal()); p_theta_derivated_term->setImag(magnitude * cosSin[m].getImag()); - //printf("magnitude=%f r_l=%f p_spherical_harmonic_Inner_coefficients_array=%f real=%f imag=%f\n", + //printf("magnitude=%e r_l=%e p_spherical_harmonic_Inner_coefficients_array=%e real=%e imag=%e\n", // magnitude,r_l,*p_spherical_harmonic_Inner_coefficients_array,p_theta_derivated_term->getReal(),p_theta_derivated_term->getImag()); ++p_term; @@ -533,7 +533,7 @@ protected: treeWidthAtLevel /= 2; //std::cout << "[precomputeM2M]treeWidthAtLevel=" << treeWidthAtLevel << "\n"; - //printf("\tidxLevel=%d\tFather.x=%f\tFather.y=%f\tFather.z=%f\n",idxLevel,father.getX(),father.getY(),father.getZ()); + //printf("\tidxLevel=%d\tFather.x=%e\tFather.y=%e\tFather.z=%e\n",idxLevel,father.getX(),father.getY(),father.getZ()); for(int idxChild = 0 ; idxChild < 8 ; ++idxChild ){ FTreeCoordinate childBox; @@ -555,11 +555,11 @@ protected: harmonicInner(positionTsmphere(L2LVector),this->transitionL2L[idxLevel][idxChild]); - //printf("[M2M_vector]%d/%d = %f/%f/%f\n", idxLevel , idxChild , M2MVector.getX() , M2MVector.getY() , M2MVector.getZ() ); - //printf("[M2M_vectorSpherical]%d/%d = %f/%f/%f/%f\n", idxLevel , idxChild , sphericalM2M.r , sphericalM2M.cosTheta , sphericalM2M.sinTheta , sphericalM2M.phi ); + //printf("[M2M_vector]%d/%d = %e/%e/%e\n", idxLevel , idxChild , M2MVector.getX() , M2MVector.getY() , M2MVector.getZ() ); + //printf("[M2M_vectorSpherical]%d/%d = %e/%e/%e/%e\n", idxLevel , idxChild , sphericalM2M.r , sphericalM2M.cosTheta , sphericalM2M.sinTheta , sphericalM2M.phi ); //for(int idxExpSize = 0 ; idxExpSize < FMB_Info_exp_size ; ++idxExpSize){ //std::cout << "transitionL2L[" << idxLevel << "][" << idxChild << "][" << idxExpSize << "]=" << this->transitionL2L[idxLevel][idxChild][idxExpSize].getReal()<<"/"<<this->transitionL2L[idxLevel][idxChild][idxExpSize].getImag()<< "\n"; - //printf("transitionM2M[%d][%d][%d]=%f/%f\n", idxLevel , idxChild , idxExpSize , this->transitionM2M[idxLevel][idxChild][idxExpSize].getReal(),this->transitionM2M[idxLevel][idxChild][idxExpSize].getImag()); + //printf("transitionM2M[%d][%d][%d]=%e/%e\n", idxLevel , idxChild , idxExpSize , this->transitionM2M[idxLevel][idxChild][idxExpSize].getReal(),this->transitionM2M[idxLevel][idxChild][idxExpSize].getImag()); //} } } @@ -593,7 +593,7 @@ protected: //printf("transferM2L[%d][%d][%d][%d]\n", idxLevel, idxd1, idxd2, idxd3); harmonicOuter(positionTsmphere(relativePos),this->transferM2L[idxLevel][idxd1][idxd2][idxd3]); //for(int idxTemp = 0 ; idxTemp < this->FMB_Info_M2L_exp_size ; ++idxTemp){ - // printf("transferM2L[%d][%d][%d][%d][%d]=%f/%f\n", idxLevel, idxd1, idxd2, idxd3, idxTemp, this->transferM2L[idxLevel][idxd1][idxd2][idxd3][idxTemp].getReal(),this->transferM2L[idxLevel][idxd1][idxd2][idxd3][idxTemp].getImag()); + // printf("transferM2L[%d][%d][%d][%d][%d]=%e/%e\n", idxLevel, idxd1, idxd2, idxd3, idxTemp, this->transferM2L[idxLevel][idxd1][idxd2][idxd3][idxTemp].getReal(),this->transferM2L[idxLevel][idxd1][idxd2][idxd3][idxTemp].getImag()); //} //[Blas] harmonicOuter(spherical,tempComplexe); @@ -673,13 +673,13 @@ public: //std::cout << "Working on part " << iterParticle.value()->getPhysicalValue() << "\n"; //F3DPosition tempPos = iterParticle.value()->getPosition() - inPole->getPosition(); - //ok printf("\tpos_rel.x=%f\tpos_rel.y=%f\tpos_rel.z=%f\n",tempPos.getX(),tempPos.getY(),tempPos.getZ()); - //ok printf("\tp_center.x=%f\tp_center.y=%f\tp_center.z=%f\n",inPole->getPosition().getX(),inPole->getPosition().getY(),inPole->getPosition().getZ()); - //ok printf("\tbody.x=%f\tbody.y=%f\tbody.z=%f\n",iterParticle.value()->getPosition().getX(),iterParticle.value()->getPosition().getY(),iterParticle.value()->getPosition().getZ()); + //ok printf("\tpos_rel.x=%e\tpos_rel.y=%e\tpos_rel.z=%e\n",tempPos.getX(),tempPos.getY(),tempPos.getZ()); + //ok printf("\tp_center.x=%e\tp_center.y=%e\tp_center.z=%e\n",inPole->getPosition().getX(),inPole->getPosition().getY(),inPole->getPosition().getZ()); + //ok printf("\tbody.x=%e\tbody.y=%e\tbody.z=%e\n",iterParticle.value()->getPosition().getX(),iterParticle.value()->getPosition().getY(),iterParticle.value()->getPosition().getZ()); harmonicInner(positionTsmphere(iterParticle.value()->getPosition() - inPole->getPosition()),current_thread_Y); - //printf("\tr=%f\tcos_theta=%f\tsin_theta=%f\tphi=%f\n",spherical.r,spherical.cosTheta,spherical.sinTheta,spherical.phi); + //printf("\tr=%e\tcos_theta=%e\tsin_theta=%e\tphi=%e\n",spherical.r,spherical.cosTheta,spherical.sinTheta,spherical.phi); FComplexe* p_exp_term = inPole->getMultipole(); FComplexe* p_Y_term = current_thread_Y; @@ -690,7 +690,7 @@ public: for(int k = 0 ; k <= j ; ++k, ++p_Y_term, ++p_exp_term){ p_Y_term->mulRealAndImag( valueParticle * pow_of_minus_1_j ); (*p_exp_term) += (*p_Y_term); - //printf("\tj=%d\tk=%d\tp_exp_term.real=%f\tp_exp_term.imag=%f\tp_Y_term.real=%f\tp_Y_term.imag=%f\tpow_of_minus_1_j=%f\n", + //printf("\tj=%d\tk=%d\tp_exp_term.real=%e\tp_exp_term.imag=%e\tp_Y_term.real=%e\tp_Y_term.imag=%e\tpow_of_minus_1_j=%e\n", // j,k,(*p_exp_term).getReal(),(*p_exp_term).getImag(),(*p_Y_term).getReal(),(*p_Y_term).getImag(),pow_of_minus_1_j); } } @@ -767,9 +767,9 @@ public: ((p_src_exp_term->getReal() * p_Inner_term->getImag()) - (p_src_exp_term->getImag() * p_Inner_term->getReal()))); - //printf("\tp_src_exp_term->getReal()=%f\tp_src_exp_term->getImag()=%f\n", p_src_exp_term->getReal(),p_src_exp_term->getImag()); - //printf("\tp_Inner_term->getReal()=%f\tp_Inner_term->getImag()=%f\n", p_Inner_term->getReal(),p_Inner_term->getImag()); - //printf("\tn[%d]l[%d]j[%d]k[%d] = %f / %f\n",n,l,j,k,p_target_exp_term->getReal(),p_target_exp_term->getImag()); + //printf("\tp_src_exp_term->getReal()=%e\tp_src_exp_term->getImag()=%e\n", p_src_exp_term->getReal(),p_src_exp_term->getImag()); + //printf("\tp_Inner_term->getReal()=%e\tp_Inner_term->getImag()=%e\n", p_Inner_term->getReal(),p_Inner_term->getImag()); + //printf("\tn[%d]l[%d]j[%d]k[%d] = %e / %e\n",n,l,j,k,p_target_exp_term->getReal(),p_target_exp_term->getImag()); } // for k } // for j @@ -794,7 +794,7 @@ public: p_target_exp_term->incImag(pow_of_minus_1_for_k * pow_of_minus_1_for_l * ((p_src_exp_term->getImag() * p_Inner_term->getReal()) - (p_src_exp_term->getReal() * p_Inner_term->getImag()))); - //printf("\tn[%d]l[%d]j[%d]k[%d] = %f / %f\n",n,l,j,k,p_target_exp_term->getReal(),p_target_exp_term->getImag()); + //printf("\tn[%d]l[%d]j[%d]k[%d] = %e / %e\n",n,l,j,k,p_target_exp_term->getReal(),p_target_exp_term->getImag()); } // for k @@ -805,7 +805,7 @@ public: p_target_exp_term->incImag( (p_src_exp_term->getImag() * p_Inner_term->getReal()) + (p_src_exp_term->getReal() * p_Inner_term->getImag())); - //printf("\tn[%d]l[%d]j[%d]k[%d] = %f / %f\n",n,l,j,k,p_target_exp_term->getReal(),p_target_exp_term->getImag()); + //printf("\tn[%d]l[%d]j[%d]k[%d] = %e / %e\n",n,l,j,k,p_target_exp_term->getReal(),p_target_exp_term->getImag()); } // for k } // for j @@ -861,9 +861,9 @@ public: (coordCenter.getX()-coordNeighbors.getX()), (coordCenter.getY()-coordNeighbors.getY()), (coordCenter.getZ()-coordNeighbors.getZ()));*/ - /*printf("M2L_transfer[0]= %f/%f\n",M2L_transfer->getReal(),M2L_transfer->getImag()); - printf("M2L_transfer[1]= %f/%f\n",M2L_transfer[1].getReal(),M2L_transfer[1].getImag()); - printf("M2L_transfer[2]= %f/%f\n",M2L_transfer[2].getReal(),M2L_transfer[2].getImag());*/ + /*printf("M2L_transfer[0]= %e/%e\n",M2L_transfer->getReal(),M2L_transfer->getImag()); + printf("M2L_transfer[1]= %e/%e\n",M2L_transfer[1].getReal(),M2L_transfer[1].getImag()); + printf("M2L_transfer[2]= %e/%e\n",M2L_transfer[2].getReal(),M2L_transfer[2].getImag());*/ const FComplexe* const multipole_exp_src = distantNeighbors[idxSize]->getMultipole(); // L_j^k FComplexe* p_target_exp_term = pole->getLocal(); @@ -906,7 +906,7 @@ public: // p_target_exp_term->getReal(),p_target_exp_term->getImag()); // printf("\t p_src_exp_term->real = %lf \t p_src_exp_term->imag = %lf \n", // p_src_exp_term->getReal(),p_src_exp_term->getImag()); - // printf("\t p_Outer_term->real = %f \t p_Outer_term->imag = %f \n", + // printf("\t p_Outer_term->real = %e \t p_Outer_term->imag = %e \n", // p_Outer_term->getReal(),p_Outer_term->getImag()); //printf("p_Outer_term-M2L_transfer = %d\n", // p_Outer_term-M2L_transfer); @@ -923,7 +923,7 @@ public: // p_target_exp_term->getReal(),p_target_exp_term->getImag()); // printf("\t\t p_src_exp_term->real = %lf \t p_src_exp_term->imag = %lf \n", // p_src_exp_term->getReal(),p_src_exp_term->getImag()); - // printf("\t\t p_Outer_term->real = %f \t p_Outer_term->imag = %f \n", + // printf("\t\t p_Outer_term->real = %e \t p_Outer_term->imag = %e \n", // p_Outer_term->getReal(),p_Outer_term->getImag()); } @@ -938,7 +938,7 @@ public: // p_target_exp_term->getReal(),p_target_exp_term->getImag()); // printf("\t\t p_src_exp_term->real = %lf \t p_src_exp_term->imag = %lf \n", // p_src_exp_term->getReal(),p_src_exp_term->getImag()); - // printf("\t\t p_Outer_term->real = %f \t p_Outer_term->imag = %f \n", + // printf("\t\t p_Outer_term->real = %e \t p_Outer_term->imag = %e \n", // p_Outer_term->getReal(),p_Outer_term->getImag()); } //printf("\tj=%d\tk=%d\tn=%d\tl=%d\n",j,k,n,l); @@ -1052,7 +1052,7 @@ public: p_Inner_term->getReal(),p_Inner_term->getImag()); printf("\t\t p_target_exp_term->real = %lf \t p_target_exp_term->imag = %lf \n", p_target_exp_term->getReal(),p_target_exp_term->getImag()); - printf("\tj=%d\tk=%d\tn=%d\tl=%d\tpow_of_minus_1_for_k=%f\n",j,k,n,l,pow_of_minus_1_for_k); + printf("\tj=%d\tk=%d\tn=%d\tl=%d\tpow_of_minus_1_for_k=%e\n",j,k,n,l,pow_of_minus_1_for_k); printf("\tp_target_exp_term = %d\n",p_target_exp_term-local_exp_target);*/ } //printf("\tj=%d\tk=%d\tn=%d\tl=%d\n",j,k,n,l); @@ -1232,7 +1232,7 @@ public: } } - /*printf("[details] ph = %f , rh = %f , th = %f \n", + /*printf("[details] ph = %e , rh = %e , th = %e \n", ph,rh,th);*/ @@ -1241,7 +1241,7 @@ public: const FReal sin_theta = FMath::Sin(th); const FReal sin_phi = FMath::Sin(ph); - /*printf("[details] cos_theta = %f \t cos_phi = %f \t sin_theta = %f \t sin_phi = %f \n", + /*printf("[details] cos_theta = %e \t cos_phi = %e \t sin_theta = %e \t sin_phi = %e \n", cos_theta, cos_phi, sin_theta, sin_phi);*/ /*printf("[force_vector_in_local_base] x = %lf \t y = %lf \t z = %lf \n", force_vector_in_local_base.getX(),force_vector_in_local_base.getY(),force_vector_in_local_base.getZ());*/ @@ -1272,7 +1272,7 @@ public: force_vector_tmp *= iterTarget.value()->getPhysicalValue(); //#endif - /*printf("[force_vector_tmp] fx = %f \t fy = %f \t fz = %f \n", + /*printf("[force_vector_tmp] fx = %e \t fy = %e \t fz = %e \n", force_vector_tmp.getX(),force_vector_tmp.getY(),force_vector_tmp.getZ());*/ iterTarget.value()->setForces( iterTarget.value()->getForces() + force_vector_tmp ); @@ -1281,7 +1281,7 @@ public: expansion_Evaluate_local_with_Y_already_computed(local->getLocal(),&potential); iterTarget.value()->setPotential(potential); - /*printf("[END] fx = %f \t fy = %f \t fz = %f \n\n", + /*printf("[END] fx = %e \t fy = %e \t fz = %e \n\n", iterTarget.value()->getForces().getX(),iterTarget.value()->getForces().getY(),iterTarget.value()->getForces().getZ());*/ //printf("p_potential = %lf\n", potential); @@ -1302,7 +1302,7 @@ public: // k=0 (*p_Y_term) *= (*local_exp); result += p_Y_term->getReal(); - //printf("\t\t p_Y_term->real = %f p_Y_term->imag = %f \t local_exp->real = %f local_exp->imag = %f \n", + //printf("\t\t p_Y_term->real = %e p_Y_term->imag = %e \t local_exp->real = %e local_exp->imag = %e \n", // p_Y_term->getReal(), p_Y_term->getImag(), local_exp->getReal(), local_exp->getImag()); ++p_Y_term; ++local_exp; @@ -1311,7 +1311,7 @@ public: for (int k=1; k<=j ;++k, ++p_Y_term, ++local_exp){ (*p_Y_term) *= (*local_exp); result += 2 * p_Y_term->getReal(); - //printf("\t\t p_Y_term->real = %f p_Y_term->imag = %f \t local_exp->real = %f local_exp->imag = %f \n", + //printf("\t\t p_Y_term->real = %e p_Y_term->imag = %e \t local_exp->real = %e local_exp->imag = %e \n", // p_Y_term->getReal(), p_Y_term->getImag(), local_exp->getReal(), local_exp->getImag()); } } @@ -1367,9 +1367,9 @@ public: iterSameBox.progress(); } - //printf("x = %f \t y = %f \t z = %f \n",iterTarget.value()->getPosition().getX(),iterTarget.value()->getPosition().getY(),iterTarget.value()->getPosition().getZ()); - //printf("\t P2P fx = %f \t fy = %f \t fz = %f \n",iterTarget.value()->getForces().getX(),iterTarget.value()->getForces().getY(),iterTarget.value()->getForces().getZ()); - //printf("\t potential = %f \n",iterTarget.value()->getPotential()); + //printf("x = %e \t y = %e \t z = %e \n",iterTarget.value()->getPosition().getX(),iterTarget.value()->getPosition().getY(),iterTarget.value()->getPosition().getZ()); + //printf("\t P2P fx = %e \t fy = %e \t fz = %e \n",iterTarget.value()->getForces().getX(),iterTarget.value()->getForces().getY(),iterTarget.value()->getForces().getZ()); + //printf("\t potential = %e \n",iterTarget.value()->getPotential()); iterTarget.progress(); } @@ -1441,9 +1441,9 @@ public: iterSameBox.progress(); } - //printf("x = %f \t y = %f \t z = %f \n",iterTarget.value()->getPosition().getX(),iterTarget.value()->getPosition().getY(),iterTarget.value()->getPosition().getZ()); - //printf("\t P2P fx = %f \t fy = %f \t fz = %f \n",iterTarget.value()->getForces().getX(),iterTarget.value()->getForces().getY(),iterTarget.value()->getForces().getZ()); - //printf("\t potential = %f \n",iterTarget.value()->getPotential()); + //printf("x = %e \t y = %e \t z = %e \n",iterTarget.value()->getPosition().getX(),iterTarget.value()->getPosition().getY(),iterTarget.value()->getPosition().getZ()); + //printf("\t P2P fx = %e \t fy = %e \t fz = %e \n",iterTarget.value()->getForces().getX(),iterTarget.value()->getForces().getY(),iterTarget.value()->getForces().getZ()); + //printf("\t potential = %e \n",iterTarget.value()->getPotential()); iterTarget.progress(); } diff --git a/Sources/Utils/FAbstractApplication.hpp b/Sources/Utils/FAbstractApplication.hpp index 9fe628601d62e3346ef68bf5f6167869c17593c6..f12e9619fdc8272d418644dbcfd44f70f2c856a0 100644 --- a/Sources/Utils/FAbstractApplication.hpp +++ b/Sources/Utils/FAbstractApplication.hpp @@ -46,6 +46,16 @@ protected: */ virtual void initSlave(){} + /** + * Send data to another process + */ + virtual void sendData(const int inReceiver, const int inSize, void* const inData, const int inTag) = 0; + + /** + * Receive from any process + */ + virtual void receiveData(const int inSize, void* const inData, int* const inSource, int* const inTag, int* const inFilledSize) = 0; + private: /** Forbiden (private) default constructor */ diff --git a/Sources/Utils/FGlobal.hpp b/Sources/Utils/FGlobal.hpp index 778090340156444a242dca6eb7f21baa7f9431c2..d892edc2d2a12ad7d709de5ca5375bbd0a1d3813 100644 --- a/Sources/Utils/FGlobal.hpp +++ b/Sources/Utils/FGlobal.hpp @@ -29,7 +29,7 @@ // MPI /////////////////////////////////////////////////////// -//#define FUSE_MPI +#define FUSE_MPI /////////////////////////////////////////////////////// // Threads diff --git a/Sources/Utils/FMpiApplication.hpp b/Sources/Utils/FMpiApplication.hpp index f85ab09503f210ee28b8c0eb22a8be36cd3be30f..51ffcc49805c5327679227e279c83e10411e4839 100644 --- a/Sources/Utils/FMpiApplication.hpp +++ b/Sources/Utils/FMpiApplication.hpp @@ -28,6 +28,19 @@ protected: virtual void run() = 0; + void sendData(const int inReceiver, const int inSize, void* const inData, const int inTag){ + MPI_Request request; + MPI_Isend(inData, inSize, MPI_CHAR , inReceiver, inTag, MPI_COMM_WORLD, &request); + } + + void receiveData(const int inSize, void* const inData, int* const inSource, int* const inTag, int* const inFilledSize){ + MPI_Status status; + MPI_Recv(inData, inSize, MPI_CHAR, MPI_ANY_SOURCE, MPI_ANY_TAG,MPI_COMM_WORLD, &status); + *inSource = status.MPI_SOURCE; + *inTag = status.MPI_TAG; + MPI_Get_count(&status,MPI_CHAR,inFilledSize); + } + public: /** * Constructor diff --git a/Sources/Utils/FSingleApplication.hpp b/Sources/Utils/FSingleApplication.hpp index 808ad80220f81142909c37fff8ff8b27c849d516..489e1c8b4e696a5ba8f754275c8a4419207b38b0 100644 --- a/Sources/Utils/FSingleApplication.hpp +++ b/Sources/Utils/FSingleApplication.hpp @@ -29,6 +29,15 @@ protected: */ virtual void run() = 0; + void sendData(const int, const int, void* const, const int ){ + } + + + void receiveData(const int, void* const, int* const inSource, int* const inTag, int* const inFilledSize){ + *inSource = 0; + *inTag = 0; + *inFilledSize = 0; + } public: /** diff --git a/Tests/testFmbAlgorithm.cpp b/Tests/testFmbAlgorithm.cpp index b897104d1041109392bdcdb8df4a4651f7ed673f..2eb3160585972baa437aec641b87c537620dff2c 100644 --- a/Tests/testFmbAlgorithm.cpp +++ b/Tests/testFmbAlgorithm.cpp @@ -18,8 +18,8 @@ #include "../Sources/Fmb/FExtendFmbCell.hpp" #include "../Sources/Core/FFmmAlgorithm.hpp" -#include "../Sources/Core/FFmmAlgorithmArray.hpp" -#include "../Sources/Core/FFmmAlgorithmArrayUs.hpp" +#include "../Sources/Core/FFmmAlgorithmThread.hpp" +#include "../Sources/Core/FFmmAlgorithmThreadUs.hpp" #include "../Sources/Components/FSimpleLeaf.hpp" @@ -115,8 +115,8 @@ int main(int argc, char ** argv){ counter.tic(); FFmbKernels<FmbParticle, FmbCell, NbLevels> kernels(loader.getBoxWidth()); - //FFmmAlgorithm FFmmAlgorithmThreaded FFmmAlgorithmArray FFmmAlgorithmTask FFmmAlgorithmArrayUs - FFmmAlgorithmArrayUs<FFmbKernels, FmbParticle, FmbCell, FSimpleLeaf, NbLevels, SizeSubLevels> algo(&tree,&kernels); + //FFmmAlgorithm FFmmAlgorithmThreaded FFmmAlgorithmThread FFmmAlgorithmTask FFmmAlgorithmThreadUs + FFmmAlgorithmThreadUs<FFmbKernels, FmbParticle, FmbCell, FSimpleLeaf, NbLevels, SizeSubLevels> algo(&tree,&kernels); algo.execute(); counter.tac(); diff --git a/Tests/testFmbBlasAlgorithm.cpp b/Tests/testFmbBlasAlgorithm.cpp index cc8821bfbe1ad67ae6ff7849367e3e7edbf8a161..d985af9a5edb9875b26686e05690c1c479eef07c 100644 --- a/Tests/testFmbBlasAlgorithm.cpp +++ b/Tests/testFmbBlasAlgorithm.cpp @@ -18,7 +18,7 @@ #include "../Sources/Fmb/FExtendFmbCell.hpp" #include "../Sources/Core/FFmmAlgorithm.hpp" -#include "../Sources/Core/FFmmAlgorithmArray.hpp" +#include "../Sources/Core/FFmmAlgorithmThread.hpp" #include "../Sources/Components/FSimpleLeaf.hpp" @@ -118,7 +118,7 @@ int main(int argc, char ** argv){ //FFmbKernelsBlas FFmbKernels FFmbKernelsBlas<FmbParticle, FmbCell, NbLevels> kernels(loader.getBoxWidth()); - //FFmmAlgorithm FFmmAlgorithmThreaded FFmmAlgorithmArray FFmmAlgorithmTask + //FFmmAlgorithm FFmmAlgorithmThreaded FFmmAlgorithmThread FFmmAlgorithmTask FFmmAlgorithm<FFmbKernelsBlas, FmbParticle, FmbCell, FSimpleLeaf, NbLevels, SizeSubLevels> algo(&tree,&kernels); algo.execute(); diff --git a/Tests/testFmbTsmAlgorithm.cpp b/Tests/testFmbTsmAlgorithm.cpp index 74db55b355a70f41b268f46d7ad874bcc2d7dff7..02fd6b26f783f7e8cd45738893b6b88339e9e81f 100644 --- a/Tests/testFmbTsmAlgorithm.cpp +++ b/Tests/testFmbTsmAlgorithm.cpp @@ -20,7 +20,7 @@ #include "../Sources/Fmb/FExtendFmbCell.hpp" #include "../Sources/Core/FFmmAlgorithm.hpp" -#include "../Sources/Core/FFmmAlgorithmArray.hpp" +#include "../Sources/Core/FFmmAlgorithmThread.hpp" #include "../Sources/Components/FSimpleLeaf.hpp" @@ -116,7 +116,7 @@ int main(int argc, char ** argv){ counter.tic(); FFmbKernels<FmbParticle, FmbCell, NbLevels> kernels(loader.getBoxWidth()); - //FFmmAlgorithm FFmmAlgorithmThreaded FFmmAlgorithmArray FFmmAlgorithmTask + //FFmmAlgorithm FFmmAlgorithmThreaded FFmmAlgorithmThread FFmmAlgorithmTask FFmmAlgorithm<FFmbKernels, FmbParticle, FmbCell, FSimpleLeaf, NbLevels, SizeSubLevels> algo(&tree,&kernels); algo.execute(); diff --git a/Tests/testFmbTsmNoTsm.cpp b/Tests/testFmbTsmNoTsm.cpp index 37bf54fe97b27b7c69fb8c5d231802cba2269835..950064e47b9693a6ab1cf9fe65deb3f3ef481c7b 100644 --- a/Tests/testFmbTsmNoTsm.cpp +++ b/Tests/testFmbTsmNoTsm.cpp @@ -23,8 +23,8 @@ #include "../Sources/Core/FFmmAlgorithm.hpp" #include "../Sources/Core/FFmmAlgorithmTsm.hpp" -#include "../Sources/Core/FFmmAlgorithmArray.hpp" -#include "../Sources/Core/FFmmAlgorithmArrayTsm.hpp" +#include "../Sources/Core/FFmmAlgorithmThread.hpp" +#include "../Sources/Core/FFmmAlgorithmThreadTsm.hpp" #include "../Sources/Components/FSimpleLeaf.hpp" #include "../Sources/Components/FTypedLeaf.hpp" @@ -130,9 +130,9 @@ int main(int argc, char ** argv){ FFmbKernels<FmbParticle, FmbCell, NbLevels> kernels(BoxWidth); FFmbKernels<FmbParticleTyped, FmbCellTyped, NbLevels> kernelsTyped(BoxWidth); - //FFmmAlgorithm FFmmAlgorithmArray - FFmmAlgorithmArray<FFmbKernels, FmbParticle, FmbCell, FSimpleLeaf, NbLevels, SizeSubLevels> algo(&tree,&kernels); - FFmmAlgorithmArrayTsm<FFmbKernels, FmbParticleTyped, FmbCellTyped, FTypedLeaf, NbLevels, SizeSubLevels> algoTyped(&treeTyped,&kernelsTyped); + //FFmmAlgorithm FFmmAlgorithmThread + FFmmAlgorithmThread<FFmbKernels, FmbParticle, FmbCell, FSimpleLeaf, NbLevels, SizeSubLevels> algo(&tree,&kernels); + FFmmAlgorithmThreadTsm<FFmbKernels, FmbParticleTyped, FmbCellTyped, FTypedLeaf, NbLevels, SizeSubLevels> algoTyped(&treeTyped,&kernelsTyped); algo.execute(); algoTyped.execute(); diff --git a/Tests/testFmmAlgorithm.cpp b/Tests/testFmmAlgorithm.cpp index 3018665053b5099096ee0578794ce5c901c76a86..7013bb441c005039c3047a5c8fef5e1bdfadb3c6 100644 --- a/Tests/testFmmAlgorithm.cpp +++ b/Tests/testFmmAlgorithm.cpp @@ -19,7 +19,7 @@ #include "../Sources/Components/FTestKernels.hpp" #include "../Sources/Core/FFmmAlgorithm.hpp" -#include "../Sources/Core/FFmmAlgorithmArray.hpp" +#include "../Sources/Core/FFmmAlgorithmThread.hpp" #include "../Sources/Components/FBasicKernels.hpp" @@ -82,8 +82,8 @@ int main(int argc, char ** argv){ // FTestKernels FBasicKernels FTestKernels<FTestParticle, FTestCell, NbLevels> kernels; - //FFmmAlgorithm FFmmAlgorithmThreaded FFmmAlgorithmArray FFmmAlgorithmTask - FFmmAlgorithmArray<FTestKernels, FTestParticle, FTestCell, FSimpleLeaf, NbLevels, SizeSubLevels> algo(&tree,&kernels); + //FFmmAlgorithm FFmmAlgorithmThread + FFmmAlgorithmThread<FTestKernels, FTestParticle, FTestCell, FSimpleLeaf, NbLevels, SizeSubLevels> algo(&tree,&kernels); algo.execute(); counter.tac(); @@ -98,9 +98,6 @@ int main(int argc, char ** argv){ ////////////////////////////////////////////////////////////////////////////////// std::cout << "Deleting particles ..." << std::endl; counter.tic(); - for(long idxPart = 0 ; idxPart < NbPart ; ++idxPart){ - particles[idxPart].~FTestParticle(); - } delete [] particles; counter.tac(); std::cout << "Done " << "(" << counter.elapsed() << "s)." << std::endl; diff --git a/Tests/testFmmAlgorithmTsm.cpp b/Tests/testFmmAlgorithmTsm.cpp index 89d5dbf44bc9b705640f5f9e3d2e43bf73ca535a..13b7e30bdd671579723967e9b97c175e38d13634 100644 --- a/Tests/testFmmAlgorithmTsm.cpp +++ b/Tests/testFmmAlgorithmTsm.cpp @@ -22,7 +22,7 @@ #include "../Sources/Extenssions/FExtendCellType.hpp" #include "../Sources/Core/FFmmAlgorithmTsm.hpp" -#include "../Sources/Core/FFmmAlgorithmArrayTsm.hpp" +#include "../Sources/Core/FFmmAlgorithmThreadTsm.hpp" #include "../Sources/Components/FBasicKernels.hpp" @@ -90,8 +90,8 @@ int main(int argc, char ** argv){ // FTestKernels FBasicKernels FTestKernels<FTestParticleTsm, FTestCellTsm, NbLevels> kernels; - //FFmmAlgorithmTsm FFmmAlgorithmArrayTsm - FFmmAlgorithmArrayTsm<FTestKernels, FTestParticleTsm, FTestCellTsm, FTypedLeaf, NbLevels, SizeSubLevels> algo(&tree,&kernels); + //FFmmAlgorithmTsm FFmmAlgorithmThreadTsm + FFmmAlgorithmThreadTsm<FTestKernels, FTestParticleTsm, FTestCellTsm, FTypedLeaf, NbLevels, SizeSubLevels> algo(&tree,&kernels); algo.execute(); counter.tac();