Commit a894af66 authored by berenger-bramas's avatar berenger-bramas
Browse files

Add a system to enable P2P in one computation.

Need some tests.

git-svn-id: svn+ssh://scm.gforge.inria.fr/svn/scalfmm/scalfmm/trunk@45 2616d619-271b-44dc-8df4-d4a8f33a7222
parent 14431534
......@@ -82,7 +82,7 @@ public:
* @param size the number of direct neighbors (the size of the array directNeighborsParticles)
*/
virtual void P2P(FList<ParticleClass*>* const FRestrict targets, const FList<ParticleClass*>* const FRestrict sources,
const FList<ParticleClass*>*const FRestrict *const FRestrict directNeighborsParticles, const int size) = 0;
FList<ParticleClass*>*const FRestrict *const FRestrict directNeighborsParticles, const int size) = 0;
};
......
......@@ -57,7 +57,7 @@ public:
/** Print the number of particles */
virtual void P2P(FList<ParticleClass*>* const FRestrict , const FList<ParticleClass*>* const FRestrict ,
const FList<ParticleClass*>* FRestrict const* FRestrict , const int ) {
FList<ParticleClass*>* FRestrict const* FRestrict , const int ) {
FTRACE( FTrace::Controller.enterFunction(FTrace::KERNELS, __FUNCTION__ , __FILE__ , __LINE__) );
FTRACE( FTrace::Controller.leaveFunction(FTrace::KERNELS) );
}
......
......@@ -78,7 +78,7 @@ public:
}
// After Downward
void P2P(FList<ParticleClass*>* const FRestrict targets, const FList<ParticleClass*>* const FRestrict sources,
const FList<ParticleClass*>* FRestrict const* FRestrict directNeighbors, const int size) {
FList<ParticleClass*>* FRestrict const* FRestrict directNeighbors, const int size) {
FTRACE( FTrace::Controller.enterFunction(FTrace::KERNELS, __FUNCTION__ , __FILE__ , __LINE__) );
// Each particles targeted is impacted by the particles sources
long inc = sources->getSize();
......
......@@ -47,7 +47,10 @@ class FFmmAlgorithmArray : protected FAssertable{
OctreeIterator* iterArray;
public:
static const int SizeShape = 3*3*3;
int shapeLeaf[SizeShape];
public:
/** The constructor need the octree and the kernels used for computation
* @param inTree the octree to work on
* @param inKernels the kernels to call
......@@ -80,12 +83,22 @@ public:
void execute(){
FTRACE( FTrace::Controller.enterFunction(FTrace::FMM, __FUNCTION__ , __FILE__ , __LINE__) );
for(int idxShape = 0 ; idxShape < SizeShape ; ++idxShape){
this->shapeLeaf[idxShape] = 0;
}
const int LeafIndex = OctreeHeight - 1;
// Count leaf
int leafs = 0;
OctreeIterator octreeIterator(tree);
octreeIterator.gotoBottomLeft();
do{
++leafs;
const MortonIndex index = octreeIterator.getCurrentGlobalIndex();
FTreeCoordinate coord;
coord.setPositionFromMorton(index, LeafIndex);
++this->shapeLeaf[(coord.getX()%3)*9 + (coord.getY()%3)*3 + (coord.getZ()%3)];
} while(octreeIterator.moveRight());
iterArray = new OctreeIterator[leafs];
assert(iterArray, "iterArray bad alloc", __LINE__, __FILE__);
......@@ -275,35 +288,56 @@ public:
FDEBUG( FDebug::Controller.write("\tStart Direct Pass\n").write(FDebug::Flush); );
FDEBUG( counterTime.tic() );
int leafs = 0;
OctreeIterator* shapeArray[SizeShape];
int countShape[SizeShape];
for(int idxShape = 0 ; idxShape < SizeShape ; ++idxShape){
shapeArray[idxShape] = new OctreeIterator[this->shapeLeaf[idxShape]];
countShape[idxShape] = 0;
}
const int LeafIndex = OctreeHeight - 1;
//int leafs = 0;
{
OctreeIterator octreeIterator(tree);
octreeIterator.gotoBottomLeft();
// for each leafs
do{
iterArray[leafs] = octreeIterator;
++leafs;
//iterArray[leafs] = octreeIterator;
//++leafs;
const MortonIndex index = octreeIterator.getCurrentGlobalIndex();
FTreeCoordinate coord;
coord.setPositionFromMorton(index, LeafIndex);
const int shapePosition = (coord.getX()%3)*9 + (coord.getY()%3)*3 + (coord.getZ()%3);
shapeArray[shapePosition][countShape[shapePosition]] = octreeIterator;
++countShape[shapePosition];
} while(octreeIterator.moveRight());
}
const int heightMinusOne = OctreeHeight - 1;
FDEBUG(computationCounter.tic());
#pragma omp parallel num_threads(FThreadNumbers)
{
Kernel * const myThreadkernels = kernels[omp_get_thread_num()];
// There is a maximum of 26 neighbors
FList<ParticleClass*>* neighbors[26];
for(int idxShape = 0 ; idxShape < SizeShape ; ++idxShape){
const int leafAtThisShape = this->shapeLeaf[idxShape];
#pragma omp parallel num_threads(FThreadNumbers)
{
Kernel * const myThreadkernels = kernels[omp_get_thread_num()];
// There is a maximum of 26 neighbors
FList<ParticleClass*>* neighbors[26];
#pragma omp for
for(int idxLeafs = 0 ; idxLeafs < leafs ; ++idxLeafs){
myThreadkernels->L2P(iterArray[idxLeafs].getCurrentCell(), iterArray[idxLeafs].getCurrentListTargets());
// need the current particles and neighbors particles
const int counter = tree->getLeafsNeighbors(neighbors, iterArray[idxLeafs].getCurrentGlobalIndex(),heightMinusOne);
myThreadkernels->P2P( iterArray[idxLeafs].getCurrentListTargets(), iterArray[idxLeafs].getCurrentListSources() , neighbors, counter);
#pragma omp for
for(int idxLeafs = 0 ; idxLeafs < leafAtThisShape ; ++idxLeafs){
myThreadkernels->L2P(shapeArray[idxShape][idxLeafs].getCurrentCell(), shapeArray[idxShape][idxLeafs].getCurrentListTargets());
// need the current particles and neighbors particles
const int counter = tree->getLeafsNeighbors(neighbors, shapeArray[idxShape][idxLeafs].getCurrentGlobalIndex(),LeafIndex);
myThreadkernels->P2P( shapeArray[idxShape][idxLeafs].getCurrentListTargets(), shapeArray[idxShape][idxLeafs].getCurrentListSources() , neighbors, counter);
}
}
}
FDEBUG(computationCounter.tac());
for(int idxShape = 0 ; idxShape < SizeShape ; ++idxShape){
delete [] shapeArray[idxShape];
}
FDEBUG( counterTime.tac() );
FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.elapsed() << "s)\n" );
FDEBUG( FDebug::Controller << "\t\t Computation : " << computationCounter.elapsed() << " s\n" );
......
#ifndef FFmmALGORITHMTASK_HPP
#define FFmmALGORITHMTASK_HPP
// /!\ Please, you must read the license at the bottom of this page
#include "../Utils/FAssertable.hpp"
#include "../Utils/FDebug.hpp"
#include "../Utils/FTrace.hpp"
#include "../Utils/FTic.hpp"
#include "../Utils/FGlobal.hpp"
#include "../Containers/FOctree.hpp"
/**
* @author Berenger Bramas (berenger.bramas@inria.fr)
* @class FFmmAlgorithmTask
* @brief
* Please read the license
*
* This class is a threaded FMM algorithm
* It just iterates on a tree and call the kernels with good arguments.
* It used openMP task to do so, each calculus is a task
*
* @warning This class does not work with intel compiler (segmentation fault)
*
* Of course this class does not deallocate pointer given in arguements.
*/
template<template< class ParticleClass, class CellClass, int OctreeHeight> class KernelClass,
class ParticleClass, class CellClass,
template<class ParticleClass> class LeafClass,
int OctreeHeight, int SubtreeHeight>
class FFmmAlgorithmTask : protected FAssertable{
// To reduce the size of variable type based on foctree in this file
typedef FOctree<ParticleClass, CellClass, LeafClass, OctreeHeight, SubtreeHeight> Octree;
typedef typename FOctree<ParticleClass, CellClass,LeafClass, OctreeHeight, SubtreeHeight>::Iterator FOctreeIterator;
Octree* const tree; //< The octree to work on
KernelClass<ParticleClass, CellClass, OctreeHeight>* kernels[FThreadNumbers]; //< The kernels
FDEBUG(FTic counterTime); //< In case of debug: to count the elapsed time
public:
/** The constructor need the octree and the kernels used for computation
* @param inTree the octree to work on
* @param inKernels the kernels to call
* An assert is launched if one of the arguments is null
*/
FFmmAlgorithmTask(Octree* const inTree, KernelClass<ParticleClass,CellClass,OctreeHeight>* const inKernels)
: tree(inTree) {
assert(tree, "tree cannot be null", __LINE__, __FILE__);
assert(kernels, "kernels cannot be null", __LINE__, __FILE__);
for(int idxThread = 0 ; idxThread < FThreadNumbers ; ++idxThread){
this->kernels[idxThread] = new KernelClass<ParticleClass, CellClass, OctreeHeight>(*inKernels);
}
FDEBUG(FDebug::Controller << "FFmmAlgorithmTask\n");
}
/** Default destructor */
virtual ~FFmmAlgorithmTask(){
for(int idxThread = 0 ; idxThread < FThreadNumbers ; ++idxThread){
delete this->kernels[idxThread];
}
}
/**
* To execute the fmm algorithm
* Call this function to run the complete algorithm
*/
void execute(){
FTRACE( FTrace::Controller.enterFunction(FTrace::FMM, __FUNCTION__ , __FILE__ , __LINE__) );
for(int idxThread = 0 ; idxThread < FThreadNumbers ; ++idxThread){
this->kernels[idxThread]->init();
}
bottomPass();
upwardPass();
downardPass();
directPass();
FTRACE( FTrace::Controller.leaveFunction(FTrace::FMM) );
}
/** P2M */
void bottomPass(){
FTRACE( FTrace::Controller.enterFunction(FTrace::FMM, __FUNCTION__ , __FILE__ , __LINE__) );
FDEBUG( FDebug::Controller.write("\tStart Bottom Pass\n").write(FDebug::Flush) );
FDEBUG( counterTime.tic() );
#pragma omp parallel num_threads(FThreadNumbers)
{
#pragma omp single
{
FOctreeIterator octreeIterator(tree);
// Iterate on leafs
octreeIterator.gotoBottomLeft();
do{
#pragma omp task //default(shared) //private(octreeIterator) //untied
{
// We need the current cell that represent the leaf
// and the list of particles
kernels[omp_get_thread_num()]->P2M( octreeIterator.getCurrentCell() , octreeIterator.getCurrentListSources());
}
} while(octreeIterator.moveRight());
}
}
FDEBUG( counterTime.tac() );
FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.elapsed() << "s)\n" );
FTRACE( FTrace::Controller.leaveFunction(FTrace::FMM) );
}
/** M2M */
void upwardPass(){
FTRACE( FTrace::Controller.enterFunction(FTrace::FMM, __FUNCTION__ , __FILE__ , __LINE__) );
FDEBUG( FDebug::Controller.write("\tStart Upward Pass\n").write(FDebug::Flush); );
FDEBUG( counterTime.tic() );
#pragma omp parallel num_threads(FThreadNumbers)
{
#pragma omp single
{
// Start from leal level - 1
FOctreeIterator octreeIterator(tree);
octreeIterator.gotoBottomLeft();
octreeIterator.moveUp();
FOctreeIterator avoidGotoLeftIterator(octreeIterator);
int idxLevel = 0;
// for each levels
for(idxLevel = OctreeHeight - 2 ; idxLevel > 1 ; --idxLevel ){
// for each cells
do{
// We need the current cell and the child
// child is an array (of 8 child) that may be null
#pragma omp task
{
kernels[omp_get_thread_num()]->M2M( octreeIterator.getCurrentCell() , octreeIterator.getCurrentChild(), idxLevel);
}
} while(octreeIterator.moveRight());
#pragma omp taskwait
avoidGotoLeftIterator.moveUp();
octreeIterator = avoidGotoLeftIterator;// equal octreeIterator.moveUp(); octreeIterator.gotoLeft();
}
}
}
FDEBUG( counterTime.tac() );
FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.elapsed() << "s)\n" );
FTRACE( FTrace::Controller.leaveFunction(FTrace::FMM) );
}
/** M2L L2L */
void downardPass(){
FTRACE( FTrace::Controller.enterFunction(FTrace::FMM, __FUNCTION__ , __FILE__ , __LINE__) );
{ // first M2L
FDEBUG( FDebug::Controller.write("\tStart Downward Pass (M2L)\n").write(FDebug::Flush); );
FDEBUG( counterTime.tic() );
#pragma omp parallel num_threads(FThreadNumbers)
{
#pragma omp single
{
FOctreeIterator octreeIterator(tree);
octreeIterator.moveDown();
FOctreeIterator avoidGotoLeftIterator(octreeIterator);
// for each levels
for(int idxLevel = 2 ; idxLevel < OctreeHeight ; ++idxLevel ){
// for each cells
do{
#pragma omp task
{
CellClass* neighbors[208];
const int counter = tree->getDistantNeighbors(neighbors, octreeIterator.getCurrentGlobalIndex(),idxLevel);
if(counter) kernels[omp_get_thread_num()]->M2L( octreeIterator.getCurrentCell() , neighbors, counter, idxLevel);
}
} while(octreeIterator.moveRight());
// not needed #pragma omp taskwait
// we can work on several levels at the same time
avoidGotoLeftIterator.moveDown();
octreeIterator = avoidGotoLeftIterator;
}
}
}
FDEBUG( counterTime.tac() );
FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.elapsed() << "s)\n" );
}
{ // second L2L
FDEBUG( FDebug::Controller.write("\tStart Downward Pass (L2L)\n").write(FDebug::Flush); );
FDEBUG( counterTime.tic() );
#pragma omp parallel num_threads(FThreadNumbers)
{
#pragma omp single
{
FOctreeIterator octreeIterator(tree);
octreeIterator.moveDown();
FOctreeIterator avoidGotoLeftIterator(octreeIterator);
const int heightMinusOne = OctreeHeight - 1;
// for each levels exepted leaf level
for(int idxLevel = 2 ; idxLevel < heightMinusOne ; ++idxLevel ){
// for each cells
do{
#pragma omp task
{
kernels[omp_get_thread_num()]->L2L( octreeIterator.getCurrentCell() , octreeIterator.getCurrentChild(), idxLevel);
}
} while(octreeIterator.moveRight());
#pragma omp taskwait
avoidGotoLeftIterator.moveDown();
octreeIterator = avoidGotoLeftIterator;
}
}
}
FDEBUG( counterTime.tac() );
FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.elapsed() << "s)\n" );
}
FTRACE( FTrace::Controller.leaveFunction(FTrace::FMM) );
}
/** P2P */
void directPass(){
FTRACE( FTrace::Controller.enterFunction(FTrace::FMM, __FUNCTION__ , __FILE__ , __LINE__) );
FDEBUG( FDebug::Controller.write("\tStart Direct Pass\n").write(FDebug::Flush); );
FDEBUG( counterTime.tic() );
#pragma omp parallel num_threads(FThreadNumbers)
{
#pragma omp single
{
const int heightMinusOne = OctreeHeight - 1;
FOctreeIterator octreeIterator(tree);
octreeIterator.gotoBottomLeft();
// for each leafs
do{
#pragma omp task
{
// There is a maximum of 26 neighbors
FList<ParticleClass*>* neighbors[26];
kernels[omp_get_thread_num()]->L2P(octreeIterator.getCurrentCell(), octreeIterator.getCurrentListTargets());
// need the current particles and neighbors particles
const int counter = tree->getLeafsNeighbors(neighbors, octreeIterator.getCurrentGlobalIndex(),heightMinusOne);
kernels[omp_get_thread_num()]->P2P( octreeIterator.getCurrentListTargets() , octreeIterator.getCurrentListSources(), neighbors, counter);
}
} while(octreeIterator.moveRight());
}
}
FDEBUG( counterTime.tac() );
FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.elapsed() << "s)\n" );
FTRACE( FTrace::Controller.leaveFunction(FTrace::FMM) );
}
};
#endif //FFmmALGORITHM_HPP
// [--LICENSE--]
#ifndef FFmmALGORITHMTHREADED_HPP
#define FFmmALGORITHMTHREADED_HPP
// /!\ Please, you must read the license at the bottom of this page
#include "../Utils/FAssertable.hpp"
#include "../Utils/FDebug.hpp"
#include "../Utils/FTrace.hpp"
#include "../Utils/FTic.hpp"
#include "../Utils/FGlobal.hpp"
#include "../Containers/FOctree.hpp"
#include <omp.h>
/**
* @author Berenger Bramas (berenger.bramas@inria.fr)
* @class FFmmAlgorithmThreaded
* @brief
* Please read the license
* This is a threaded FMM Algorithm
* Most of your code is unchanged excepted that your kerneks must have a copy operator.
* This is a naive approach using a mutex to share the tree's iterator
*
* The parallel algorithm is simple, each thread is taking a value from the iterator (protected by a mutex)
*/
template<template< class ParticleClass, class CellClass, int OctreeHeight> class KernelClass,
class ParticleClass, class CellClass,
template<class ParticleClass> class LeafClass,
int OctreeHeight, int SubtreeHeight>
class FFmmAlgorithmThreaded : protected FAssertable{
// To reduce the size of variable type based on foctree in this file
typedef FOctree<ParticleClass, CellClass, LeafClass, OctreeHeight, SubtreeHeight> Octree;
typedef typename FOctree<ParticleClass, CellClass, LeafClass, OctreeHeight, SubtreeHeight>::Iterator FOctreeIterator;
KernelClass<ParticleClass, CellClass, OctreeHeight>* kernels[FThreadNumbers]; //< The kernels (one by thread)
Octree* const tree; //< The octree to work on
FDEBUG(FTic counterTime); //< In case of debug count the time
public:
/** The constructor need the octree and the kernels used for computation
* @param inTree the octree
* @param inKernels the kernels
* an assert is launched if one of the arguments is null
*/
FFmmAlgorithmThreaded(Octree* const inTree,
KernelClass<ParticleClass, CellClass, OctreeHeight>* const inKernels)
: tree(inTree) {
assert(tree, "tree cannot be null", __LINE__, __FILE__);
assert(kernels, "kernels cannot be null", __LINE__, __FILE__);
for(int idxThread = 0 ; idxThread < FThreadNumbers ; ++idxThread){
this->kernels[idxThread] = new KernelClass<ParticleClass, CellClass, OctreeHeight>(*inKernels);
}
FDEBUG(FDebug::Controller << "FFmmAlgorithmThreaded\n" );
}
/** Default destructor */
virtual ~FFmmAlgorithmThreaded(){
for(int idxThread = 0 ; idxThread < FThreadNumbers ; ++idxThread){
delete this->kernels[idxThread];
}
}
/** To execute the fmm algorithm
* Call this function to run the complete algo
*/
void execute(){
FTRACE( FTrace::Controller.enterFunction(FTrace::FMM, __FUNCTION__ , __FILE__ , __LINE__) );
for(int idxThread = 0 ; idxThread < FThreadNumbers ; ++idxThread){
this->kernels[idxThread]->init();
}
bottomPass();
upwardPass();
downardPass();
directPass();
FTRACE( FTrace::Controller.leaveFunction(FTrace::FMM) );
}
/** P2M */
void bottomPass(){
FTRACE( FTrace::Controller.enterFunction(FTrace::FMM, __FUNCTION__ , __FILE__ , __LINE__) );
FDEBUG( FDebug::Controller.write("\tStart Bottom Pass\n").write(FDebug::Flush) );
FDEBUG( counterTime.tic() );
FOctreeIterator octreeIterator(tree);
// Iterate on leafs
octreeIterator.gotoBottomLeft();
omp_lock_t mutex;
omp_init_lock(&mutex);
bool stop = false;
#pragma omp parallel shared(octreeIterator,mutex,stop) num_threads(FThreadNumbers)
{
const int threadId = omp_get_thread_num();
omp_set_lock(&mutex);
while(!stop){
CellClass*const cell = octreeIterator.getCurrentCell();
const FList<ParticleClass*>* const particles = octreeIterator.getCurrentListSources();
if(!octreeIterator.moveRight()) stop = true;
omp_unset_lock(&mutex);
// We need the current cell that represent the leaf
// and the list of particles
kernels[threadId]->P2M( cell , particles);
omp_set_lock(&mutex);
}
omp_unset_lock(&mutex);
}
omp_destroy_lock(&mutex);
FDEBUG( counterTime.tac() );
FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.elapsed() << "s)\n" );
FTRACE( FTrace::Controller.leaveFunction(FTrace::FMM) );
}
/** M2M */
void upwardPass(){
FTRACE( FTrace::Controller.enterFunction(FTrace::FMM, __FUNCTION__ , __FILE__ , __LINE__) );
FDEBUG( FDebug::Controller.write("\tStart Upward Pass\n").write(FDebug::Flush); );
FDEBUG( counterTime.tic() );
FOctreeIterator octreeIterator(tree);
octreeIterator.gotoBottomLeft();
octreeIterator.moveUp();
omp_lock_t mutex;
omp_init_lock(&mutex);
// for each levels
for(int idxLevel = OctreeHeight - 2 ; idxLevel > 1 ; --idxLevel ){
bool stop = false;
#pragma omp parallel shared(octreeIterator,mutex,stop) num_threads(FThreadNumbers)
{
const int threadId = omp_get_thread_num();
omp_set_lock(&mutex);
// for each cells
while(!stop){
// We need the current cell and the child
// child is an array (of 8 child) that may be null
CellClass*const cell = octreeIterator.getCurrentCell();
const CellClass*const*const child = octreeIterator.getCurrentChild();
if(!octreeIterator.moveRight()) stop = true;
omp_unset_lock(&mutex);
kernels[threadId]->M2M( cell , child, idxLevel);
omp_set_lock(&mutex);
}
omp_unset_lock(&mutex);
}
octreeIterator.moveUp();
octreeIterator.gotoLeft();
}
omp_destroy_lock(&mutex);
FDEBUG( counterTime.tac() );
FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.elapsed() << "s)\n" );
FTRACE( FTrace::Controller.leaveFunction(FTrace::FMM) );
}
/** M2L L2L */
void downardPass(){
FTRACE( FTrace::Controller.enterFunction(FTrace::FMM, __FUNCTION__ , __FILE__ , __LINE__) );
FDEBUG( FDebug::Controller.write("\tStart Downward Pass (M2L)\n").write(FDebug::Flush); );
FDEBUG( counterTime.tic() );
{ // first M2L
FOctreeIterator octreeIterator(tree);
octreeIterator.moveDown();
omp_lock_t mutex;