Commit 8c1a3efd authored by berenger-bramas's avatar berenger-bramas
Browse files

1 - Clean some parts

2 - Add a task based fmm threaded algorithm.
    It uses the OpenMP task (but needs some test)
    Now there are 3 multi thread fmm algorithms
        Naive approach (iter + mutex)
        Task based approach
        Inspector-executor model (closest to fmb original approach)

git-svn-id: svn+ssh://scm.gforge.inria.fr/svn/scalfmm/scalfmm/trunk@27 2616d619-271b-44dc-8df4-d4a8f33a7222
parent 038874df
......@@ -18,11 +18,14 @@
* @brief
* Please read the license
*
* This class is a basic FMM algorithm
* This class is a threaded FMM algorithm
* It just iterates on a tree and call the kernels with good arguments.
* It used the inspector-executor model :
* iterates on the tree and builds an array to work in parallel on this array
*
* Of course this class does not deallocate pointer given in arguements.
*
* Threaded & based on the inspector-executor model
* schedule(runtime)
*/
template<template< class ParticuleClass, class CellClass, int OctreeHeight> class KernelClass, class ParticuleClass, class CellClass, int OctreeHeight, int SubtreeHeight>
......
#ifndef FFMMALGORITHMTASK_HPP
#define FFMMALGORITHMTASK_HPP
// /!\ Please, you must read the license at the bottom of this page
#include "../Utils/FAssertable.hpp"
#include "../Utils/FDebug.hpp"
#include "../Utils/FTrace.hpp"
#include "../Utils/FTic.hpp"
#include "../Containers/FOctree.hpp"
/**
* @author Berenger Bramas (berenger.bramas@inria.fr)
* @class FFMMAlgorithmTask
* @brief
* Please read the license
*
* This class is a threaded FMM algorithm
* It just iterates on a tree and call the kernels with good arguments.
* It used openMP task to do so, each calculus is a task
*
* @warning This class does not work with intel compiler (segmentation fault)
*
* Of course this class does not deallocate pointer given in arguements.
*/
template<template< class ParticuleClass, class CellClass, int OctreeHeight> class KernelClass, class ParticuleClass, class CellClass, int OctreeHeight, int SubtreeHeight>
class FFMMAlgorithmTask : protected FAssertable{
// To reduce the size of variable type based on foctree in this file
typedef FOctree<ParticuleClass, CellClass, OctreeHeight, SubtreeHeight> Octree;
typedef typename FOctree<ParticuleClass, CellClass, OctreeHeight, SubtreeHeight>::Iterator FOctreeIterator;
static const int NbThreads = 4; //< Number of threads (currently a static number)
Octree* const tree; //< The octree to work on
KernelClass<ParticuleClass, CellClass, OctreeHeight>* kernels[NbThreads]; //< The kernels
FDEBUG(FTic counter); //< In case of debug: to count the elapsed time
public:
/** The constructor need the octree and the kernels used for computation
* @param inTree the octree to work on
* @param inKernels the kernels to call
* An assert is launched if one of the arguments is null
*/
FFMMAlgorithmTask(Octree* const inTree, KernelClass<ParticuleClass,CellClass,OctreeHeight>* const inKernels)
: tree(inTree) {
assert(tree, "tree cannot be null", __LINE__, __FILE__);
assert(kernels, "kernels cannot be null", __LINE__, __FILE__);
for(int idxThread = 0 ; idxThread < NbThreads ; ++idxThread){
this->kernels[idxThread] = new KernelClass<ParticuleClass, CellClass, OctreeHeight>(*inKernels);
}
FDEBUG(FDebug::Controller << "FFMMAlgorithmTask\n");
}
/** Default destructor */
virtual ~FFMMAlgorithmTask(){
for(int idxThread = 0 ; idxThread < NbThreads ; ++idxThread){
delete this->kernels[idxThread];
}
}
/**
* To execute the fmm algorithm
* Call this function to run the complete algorithm
*/
void execute(){
FTRACE( FTrace::Controller.enterFunction(FTrace::FMM, __FUNCTION__ , __FILE__ , __LINE__) );
for(int idxThread = 0 ; idxThread < NbThreads ; ++idxThread){
this->kernels[idxThread]->init();
}
bottomPass();
upwardPass();
downardPass();
directPass();
FTRACE( FTrace::Controller.leaveFunction(FTrace::FMM) );
}
/** P2M */
void bottomPass(){
FTRACE( FTrace::Controller.enterFunction(FTrace::FMM, __FUNCTION__ , __FILE__ , __LINE__) );
FDEBUG( FDebug::Controller.write("\tStart Bottom Pass\n").write(FDebug::Flush) );
FDEBUG( counter.tic() );
#pragma omp parallel num_threads(NbThreads)
{
#pragma omp single
{
FOctreeIterator octreeIterator(tree);
// Iterate on leafs
octreeIterator.gotoBottomLeft();
do{
#pragma omp task //default(shared) //private(octreeIterator) //untied
{
// We need the current cell that represent the leaf
// and the list of particules
kernels[omp_get_thread_num()]->P2M( octreeIterator.getCurrentCell() , octreeIterator.getCurrentList());
}
} while(octreeIterator.moveRight());
}
}
FDEBUG( counter.tac() );
FDEBUG( FDebug::Controller << "\tFinished (" << counter.elapsed() << "s)\n" );
FTRACE( FTrace::Controller.leaveFunction(FTrace::FMM) );
}
/** M2M */
void upwardPass(){
FTRACE( FTrace::Controller.enterFunction(FTrace::FMM, __FUNCTION__ , __FILE__ , __LINE__) );
FDEBUG( FDebug::Controller.write("\tStart Upward Pass\n").write(FDebug::Flush); );
FDEBUG( counter.tic() );
#pragma omp parallel num_threads(NbThreads)
{
#pragma omp single
{
// Start from leal level - 1
FOctreeIterator octreeIterator(tree);
octreeIterator.gotoBottomLeft();
octreeIterator.moveUp();
FOctreeIterator avoidGotoLeftIterator(octreeIterator);
int idxLevel = 0;
// for each levels
for(idxLevel = OctreeHeight - 2 ; idxLevel > 1 ; --idxLevel ){
// for each cells
do{
// We need the current cell and the child
// child is an array (of 8 child) that may be null
#pragma omp task
{
kernels[omp_get_thread_num()]->M2M( octreeIterator.getCurrentCell() , octreeIterator.getCurrentChild(), idxLevel);
}
} while(octreeIterator.moveRight());
#pragma omp taskwait
avoidGotoLeftIterator.moveUp();
octreeIterator = avoidGotoLeftIterator;// equal octreeIterator.moveUp(); octreeIterator.gotoLeft();
}
}
}
FDEBUG( counter.tac() );
FDEBUG( FDebug::Controller << "\tFinished (" << counter.elapsed() << "s)\n" );
FTRACE( FTrace::Controller.leaveFunction(FTrace::FMM) );
}
/** M2L L2L */
void downardPass(){
FTRACE( FTrace::Controller.enterFunction(FTrace::FMM, __FUNCTION__ , __FILE__ , __LINE__) );
{ // first M2L
FDEBUG( FDebug::Controller.write("\tStart Downward Pass (M2L)\n").write(FDebug::Flush); );
FDEBUG( counter.tic() );
#pragma omp parallel num_threads(NbThreads)
{
#pragma omp single
{
FOctreeIterator octreeIterator(tree);
octreeIterator.moveDown();
FOctreeIterator avoidGotoLeftIterator(octreeIterator);
// for each levels
for(int idxLevel = 2 ; idxLevel < OctreeHeight ; ++idxLevel ){
// for each cells
do{
#pragma omp task
{
CellClass* neighbors[208];
const int counter = tree->getDistantNeighbors(neighbors, octreeIterator.getCurrentGlobalIndex(),idxLevel);
if(counter) kernels[omp_get_thread_num()]->M2L( octreeIterator.getCurrentCell() , neighbors, counter, idxLevel);
}
} while(octreeIterator.moveRight());
// not needed #pragma omp taskwait
// we can work on several levels at the same time
avoidGotoLeftIterator.moveDown();
octreeIterator = avoidGotoLeftIterator;
}
}
}
FDEBUG( counter.tac() );
FDEBUG( FDebug::Controller << "\tFinished (" << counter.elapsed() << "s)\n" );
}
{ // second L2L
FDEBUG( FDebug::Controller.write("\tStart Downward Pass (L2L)\n").write(FDebug::Flush); );
FDEBUG( counter.tic() );
#pragma omp parallel num_threads(NbThreads)
{
#pragma omp single
{
FOctreeIterator octreeIterator(tree);
octreeIterator.moveDown();
FOctreeIterator avoidGotoLeftIterator(octreeIterator);
const int heightMinusOne = OctreeHeight - 1;
// for each levels exepted leaf level
for(int idxLevel = 2 ; idxLevel < heightMinusOne ; ++idxLevel ){
// for each cells
do{
#pragma omp task
{
kernels[omp_get_thread_num()]->L2L( octreeIterator.getCurrentCell() , octreeIterator.getCurrentChild(), idxLevel);
}
} while(octreeIterator.moveRight());
#pragma omp taskwait
avoidGotoLeftIterator.moveDown();
octreeIterator = avoidGotoLeftIterator;
}
}
}
FDEBUG( counter.tac() );
FDEBUG( FDebug::Controller << "\tFinished (" << counter.elapsed() << "s)\n" );
}
FTRACE( FTrace::Controller.leaveFunction(FTrace::FMM) );
}
/** P2P */
void directPass(){
FTRACE( FTrace::Controller.enterFunction(FTrace::FMM, __FUNCTION__ , __FILE__ , __LINE__) );
FDEBUG( FDebug::Controller.write("\tStart Direct Pass\n").write(FDebug::Flush); );
FDEBUG( counter.tic() );
#pragma omp parallel num_threads(NbThreads)
{
#pragma omp single
{
const int heightMinusOne = OctreeHeight - 1;
FOctreeIterator octreeIterator(tree);
octreeIterator.gotoBottomLeft();
// for each leafs
do{
#pragma omp task
{
// There is a maximum of 26 neighbors
FList<ParticuleClass*>* neighbors[26];
kernels[omp_get_thread_num()]->L2P(octreeIterator.getCurrentCell(), octreeIterator.getCurrentList());
// need the current particules and neighbors particules
const int counter = tree->getLeafsNeighbors(neighbors, octreeIterator.getCurrentGlobalIndex(),heightMinusOne);
kernels[omp_get_thread_num()]->P2P( octreeIterator.getCurrentList() , neighbors, counter);
}
} while(octreeIterator.moveRight());
}
}
FDEBUG( counter.tac() );
FDEBUG( FDebug::Controller << "\tFinished (" << counter.elapsed() << "s)\n" );
FTRACE( FTrace::Controller.leaveFunction(FTrace::FMM) );
}
};
#endif //FFMMALGORITHM_HPP
// [--LICENSE--]
......@@ -6,7 +6,6 @@
#include "../Utils/FDebug.hpp"
#include "../Utils/FTrace.hpp"
#include "../Utils/FTic.hpp"
#include "../Utils/FOpenMPThread.hpp"
#include "../Containers/FOctree.hpp"
......@@ -18,8 +17,9 @@
* @class FFMMAlgorithmThreaded
* @brief
* Please read the license
* This is a parallel FMM Algorithm
* This is a threaded FMM Algorithm
* Most of your code is unchanged excepted that your kerneks must have a copy operator.
* This is a naive approach using a mutex to share the tree's iterator
*
* The parallel algorithm is simple, each thread is taking a value from the iterator (protected by a mutex)
*/
......
......@@ -226,13 +226,17 @@ protected:
//////////////////////////////////////////////////////////////////
// position_2_r_cos_th_sin_th_ph
void positionToSphere(const F3DPosition& inVector, Spherical* const FRestrict outSphere ){
Spherical positionToSphere(const F3DPosition& inVector){
const FReal x2y2 = (inVector.getX() * inVector.getX()) + (inVector.getY() * inVector.getY());
outSphere->r = FMath::Sqrt( x2y2 + (inVector.getZ() * inVector.getZ()));
outSphere->phi = FMath::Atan2(inVector.getY(),inVector.getX());
outSphere->cosTheta = inVector.getZ() / outSphere->r; // cos_th = z/r
outSphere->sinTheta = FMath::Sqrt(x2y2) / outSphere->r; // sin_th = sqrt(x^2 + y^2)/r
Spherical outSphere;
outSphere.r = FMath::Sqrt( x2y2 + (inVector.getZ() * inVector.getZ()));
outSphere.phi = FMath::Atan2(inVector.getY(),inVector.getX());
outSphere.cosTheta = inVector.getZ() / outSphere.r; // cos_th = z/r
outSphere.sinTheta = FMath::Sqrt(x2y2) / outSphere.r; // sin_th = sqrt(x^2 + y^2)/r
return outSphere;
}
// associated_Legendre_function_Fill_complete_array_of_values_for_cos
......@@ -276,6 +280,7 @@ protected:
}
// spherical_harmonic_Inner
//2.7 these
void harmonicInner(const Spherical& inSphere, FComplexe* const outResults){
FComplexe* ptrCosSin = this->cosSin;
......@@ -525,18 +530,16 @@ protected:
father.getY() - (treeWidthAtLevel * (1 + (childBox.getY() * 2))),
father.getZ() - (treeWidthAtLevel * (1 + (childBox.getZ() * 2)))
);
Spherical sphericalM2M;
positionToSphere(M2MVector,&sphericalM2M);
harmonicInner(sphericalM2M,this->transitionM2M[idxLevel][idxChild]);
harmonicInner(positionToSphere(M2MVector),this->transitionM2M[idxLevel][idxChild]);
const F3DPosition L2LVector (
(treeWidthAtLevel * (1 + (childBox.getX() * 2))) - father.getX(),
(treeWidthAtLevel * (1 + (childBox.getY() * 2))) - father.getY(),
(treeWidthAtLevel * (1 + (childBox.getZ() * 2))) - father.getZ()
);
Spherical sphericalL2L;
positionToSphere(L2LVector,&sphericalL2L);
harmonicInner(sphericalL2L,this->transitionL2L[idxLevel][idxChild]);
harmonicInner(positionToSphere(L2LVector),this->transitionL2L[idxLevel][idxChild]);
//printf("[M2M_vector]%d/%d = %f/%f/%f\n", idxLevel , idxChild , M2MVector.getX() , M2MVector.getY() , M2MVector.getZ() );
//printf("[M2M_vectorSpherical]%d/%d = %f/%f/%f/%f\n", idxLevel , idxChild , sphericalM2M.r , sphericalM2M.cosTheta , sphericalM2M.sinTheta , sphericalM2M.phi );
......@@ -572,11 +575,9 @@ protected:
if( ( x*x + y*y + z*z ) >= ( 3*FMB_Info_ws*FMB_Info_ws + 0.1 ) ){
const F3DPosition relativePos( x*treeWidthAtLevel , y*treeWidthAtLevel , z*treeWidthAtLevel );
Spherical spherical;
positionToSphere(relativePos,&spherical);
// Not blas so
//printf("transferM2L[%d][%d][%d][%d]\n", idxLevel, idxd1, idxd2, idxd3);
harmonicOuter(spherical,this->transferM2L[idxLevel][idxd1][idxd2][idxd3]);
harmonicOuter(positionToSphere(relativePos),this->transferM2L[idxLevel][idxd1][idxd2][idxd3]);
//for(int idxTemp = 0 ; idxTemp < this->FMB_Info_M2L_exp_size ; ++idxTemp){
// printf("transferM2L[%d][%d][%d][%d][%d]=%f/%f\n", idxLevel, idxd1, idxd2, idxd3, idxTemp, this->transferM2L[idxLevel][idxd1][idxd2][idxd3][idxTemp].getReal(),this->transferM2L[idxLevel][idxd1][idxd2][idxd3][idxTemp].getImag());
//}
......@@ -652,16 +653,14 @@ public:
for(typename FList<ParticuleClass*>::ConstBasicIterator iterParticule(*inParticules);
iterParticule.isValide() ; iterParticule.progress()){
Spherical spherical;
positionToSphere(iterParticule.value()->getPosition() - inPole->getPosition(), &spherical);
//std::cout << "Working on part " << iterParticule.value()->getPhysicalValue() << "\n";
F3DPosition tempPos = iterParticule.value()->getPosition() - inPole->getPosition();
//F3DPosition tempPos = iterParticule.value()->getPosition() - inPole->getPosition();
//ok printf("\tpos_rel.x=%f\tpos_rel.y=%f\tpos_rel.z=%f\n",tempPos.getX(),tempPos.getY(),tempPos.getZ());
//ok printf("\tp_center.x=%f\tp_center.y=%f\tp_center.z=%f\n",inPole->getPosition().getX(),inPole->getPosition().getY(),inPole->getPosition().getZ());
//ok printf("\tbody.x=%f\tbody.y=%f\tbody.z=%f\n",iterParticule.value()->getPosition().getX(),iterParticule.value()->getPosition().getY(),iterParticule.value()->getPosition().getZ());
harmonicInner(spherical,current_thread_Y);
harmonicInner(positionToSphere(iterParticule.value()->getPosition() - inPole->getPosition()),current_thread_Y);
//ok printf("\tr=%f\tcos_theta=%f\tsin_theta=%f\tphi=%f\n",spherical.r,spherical.cosTheta,spherical.sinTheta,spherical.phi);
......@@ -679,6 +678,7 @@ public:
}
}
}
FTRACE( FTrace::Controller.leaveFunction(FTrace::KERNELS) );
}
......
......@@ -43,9 +43,7 @@ class FFmbKernelsForces : public FAbstractFmbKernels<ParticuleClass,CellClass, T
F3DPosition force_vector_in_local_base;
typename FAbstractFmbKernels<ParticuleClass,CellClass,TreeHeight>::Spherical spherical;
positionToSphere( iterTarget.value()->getPosition() - local->getPosition(), &spherical );
spherical = positionToSphere( iterTarget.value()->getPosition() - local->getPosition());
harmonicInnerThetaDerivated( spherical, FAbstractFmbKernels<ParticuleClass,CellClass,TreeHeight>::current_thread_Y, FAbstractFmbKernels<ParticuleClass,CellClass,TreeHeight>::current_thread_Y_theta_derivated);
// The maximum degree used here will be P.
......
......@@ -36,9 +36,8 @@ public:
//printf("Morton %lld\n",local->getMortonIndex());
// expansion_Evaluate_local
typename FAbstractFmbKernels<ParticuleClass,CellClass,TreeHeight>::Spherical spherical;
positionToSphere( iterTarget.value()->getPosition() - local->getPosition(), &spherical );
harmonicInner( spherical, FAbstractFmbKernels<ParticuleClass,CellClass,TreeHeight>::current_thread_Y);
harmonicInner( positionToSphere( iterTarget.value()->getPosition() - local->getPosition()),
FAbstractFmbKernels<ParticuleClass,CellClass,TreeHeight>::current_thread_Y);
FReal potential;
expansion_Evaluate_local_with_Y_already_computed(local->getLocal(),&potential);
......
......@@ -38,8 +38,7 @@ public:
F3DPosition force_vector_in_local_base;
typename FAbstractFmbKernels<ParticuleClass,CellClass,TreeHeight>::Spherical spherical;
positionToSphere( iterTarget.value()->getPosition() - local->getPosition(), &spherical );
spherical = positionToSphere( iterTarget.value()->getPosition() - local->getPosition());
/*printf("\t\t bodies_it_Get_p_position(&it) x = %lf \t y = %lf \t z = %lf \n",
(iterTarget.value()->getPosition()).getX(),
......
......@@ -19,6 +19,9 @@
#include "../Sources/Core/FFMMAlgorithm.hpp"
#include "../Sources/Core/FFMMAlgorithmArray.hpp"
#include "../Sources/Core/FFMMAlgorithmThreaded.hpp"
#include "../Sources/Core/FFMMAlgorithmTask.hpp"
#include "../Sources/Core/FBasicKernels.hpp"
// Compile by : g++ testFMMAlgorithm.cpp ../Sources/Utils/FAssertable.cpp ../Sources/Utils/FDebug.cpp ../Sources/Utils/FTrace.cpp -lgomp -fopenmp -O2 -o testFMMAlgorithm.exe
......@@ -74,7 +77,7 @@ int main(int , char ** ){
// FTestKernels FBasicKernels
FTestKernels<FTestParticule, FTestCell, NbLevels> kernels;
//FFMMAlgorithm FFMMAlgorithmThreaded FFMMAlgorithmArray
//FFMMAlgorithm FFMMAlgorithmThreaded FFMMAlgorithmArray FFMMAlgorithmTask
FFMMAlgorithmArray<FTestKernels, FTestParticule, FTestCell, NbLevels, SizeSubLevels> algo(&tree,&kernels);
algo.execute();
......
......@@ -20,6 +20,7 @@
#include "../Sources/Core/FFMMAlgorithm.hpp"
#include "../Sources/Core/FFMMAlgorithmArray.hpp"
#include "../Sources/Core/FFMMAlgorithmThreaded.hpp"
#include "../Sources/Core/FFMMAlgorithmTask.hpp"
#include "../Sources/Fmb/FFmbKernelsPotentialForces.hpp"
#include "../Sources/Fmb/FFmbKernelsForces.hpp"
......@@ -28,6 +29,7 @@
#include "../Sources/Files/FFMALoader.hpp"
// With openmp : g++ testFmbAlgorithm.cpp ../Sources/Utils/FAssertable.cpp ../Sources/Utils/FDebug.cpp ../Sources/Utils/FTrace.cpp -lgomp -fopenmp -O2 -o testFmbAlgorithm.exe
// icpc -openmp -openmp-lib=compat testFmbAlgorithm.cpp ../Sources/Utils/FAssertable.cpp ../Sources/Utils/FDebug.cpp -O2 -o testFmbAlgorithm.exe
/** This program show an example of use of
* the fmm basic algo
......@@ -99,8 +101,8 @@ int main(int , char ** ){
//FFmbKernelsPotentialForces FFmbKernelsForces FFmbKernelsPotential
FFmbKernelsPotentialForces<FmbParticule, FmbCell, NbLevels> kernels(NbLevels,loader.getBoxWidth());
//FFMMAlgorithm FFMMAlgorithmThreaded FFMMAlgorithmArray
FFMMAlgorithm<FFmbKernelsPotentialForces, FmbParticule, FmbCell, NbLevels, SizeSubLevels> algo(&tree,&kernels);
//FFMMAlgorithm FFMMAlgorithmThreaded FFMMAlgorithmArray FFMMAlgorithmTask
FFMMAlgorithmTask<FFmbKernelsPotentialForces, FmbParticule, FmbCell, NbLevels, SizeSubLevels> algo(&tree,&kernels);
algo.execute();
counter.tac();
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment