Commit 6151cdfa authored by berenger-bramas's avatar berenger-bramas
Browse files

Upgrading iteration performances & adding simple threaded versions

git-svn-id: svn+ssh://scm.gforge.inria.fr/svn/scalfmm/scalfmm/trunk@19 2616d619-271b-44dc-8df4-d4a8f33a7222
parent 5293efde
......@@ -213,6 +213,48 @@ public:
};
class ConstBasicIterator {
private:
const Node* iter; //< current node on the list
public:
/**
* Constructor needs the target list
* @param the list to iterate on
*/
ConstBasicIterator(const FList& list) : iter(list.root){
}
/** to progress on the list */
void progress(){
if(this->iter) this->iter = this->iter->next;
}
/**
* Current pointed value
* current iterator must be valide (isValide()) to use this function
*/
Object value(){
return this->iter->target;
}
/**
* Current pointed value
* current iterator must be valide (isValide()) to use this function
*/
const Object& value() const{
return this->iter->target;
}
/**
* To know if an iterator is at the end of the list
* @return true if the current iterator can progress and access to value, else false
*/
bool isValide() const{
return iter;
}
};
};
#endif //FLIST_HPP
......
......@@ -192,6 +192,10 @@ public:
this->currentLocalIndex = TransposeIndex(this->current.tree->getLeftLeafIndex(), (this->current.tree->getSubOctreeHeight() - this->currentLocalLevel - 1) );
}
Iterator() : currentLocalLevel(0), currentLocalIndex(0) {
current.tree = 0;
}
/** Copy constructor
* @param other source iterator to copy
*/
......@@ -206,7 +210,10 @@ public:
* @return this after copy
*/
Iterator& operator=(const Iterator& other){
memcpy(this, &other, sizeof(Iterator));
this->current = other.current ;
this->currentLocalLevel = other.currentLocalLevel ;
this->currentLocalIndex = other.currentLocalIndex ;
return *this;
}
/**
......
......@@ -34,7 +34,7 @@
* @warning Give the particuleClass & cellClass
*/
template< class ParticuleClass, class CellClass >
class FAbstractSubOctree : public FAssertable{
class FAbstractSubOctree : protected FAssertable{
protected:
const int subOctreeHeight; //< Height of this suboctree
const int subOctreePosition; //< Level of the current suboctree in the global tree (0 if node)
......
......@@ -20,22 +20,22 @@ public:
virtual void init(){}
/** P2M */
virtual void P2M(CellClass* const pole, FList<ParticuleClass*>* const particules) = 0;
virtual void P2M(CellClass* const pole, const FList<ParticuleClass*>* const particules) = 0;
/** M2M */
virtual void M2M(CellClass* const pole, CellClass** const child, const int inLevel) = 0;
virtual void M2M(CellClass* const pole, const CellClass*const* const child, const int inLevel) = 0;
/** M2L */
virtual void M2L(CellClass* const pole, CellClass** const distantNeighbors, const int size, const int inLevel) = 0;
virtual void M2L(CellClass* const pole, const CellClass*const* const distantNeighbors, const int size, const int inLevel) = 0;
/** L2L */
virtual void L2L(CellClass* const pole, CellClass** const child, const int inLevel) = 0;
virtual void L2L(const CellClass* const pole, CellClass** const child, const int inLevel) = 0;
/** L2P */
virtual void L2P(CellClass* const pole, FList<ParticuleClass*>* const particules) = 0;
virtual void L2P(const CellClass* const pole, FList<ParticuleClass*>* const particules) = 0;
/** P2P */
virtual void P2P(FList<ParticuleClass*>* const pole, FList<ParticuleClass*>** const directNeighbors, const int size) = 0;
virtual void P2P(FList<ParticuleClass*>* const pole, const FList<ParticuleClass*>*const* const directNeighbors, const int size) = 0;
};
......
......@@ -26,33 +26,33 @@ public:
virtual void init(){}
/** Print the number of particules */
virtual void P2M(CellClass* const pole, FList<ParticuleClass*>* const particules) {
FDEBUG( FDebug::Controller << "P2M : " << particules->getSize() << "\n" );
virtual void P2M(CellClass* const pole, const FList<ParticuleClass*>* const particules) {
//FDEBUG( FDebug::Controller << "P2M : " << particules->getSize() << "\n" );
}
/** Print the morton index */
virtual void M2M(CellClass* const pole, CellClass** const child, const int inLevel) {
FDEBUG( FDebug::Controller << "M2M : " << pole->getMortonIndex() << "\n" );
virtual void M2M(CellClass* const pole, const CellClass*const* const child, const int inLevel) {
//FDEBUG( FDebug::Controller << "M2M : " << pole->getMortonIndex() << "\n" );
}
/** Print the morton index */
virtual void M2L(CellClass* const pole, CellClass** const distantNeighbors, const int size, const int inLevel) {
FDEBUG( FDebug::Controller << "M2L : " << pole->getMortonIndex() << " (" << size << ")\n" );
virtual void M2L(CellClass* const pole, const CellClass*const* const distantNeighbors, const int size, const int inLevel) {
//FDEBUG( FDebug::Controller << "M2L : " << pole->getMortonIndex() << " (" << size << ")\n" );
}
/** Print the morton index */
virtual void L2L(CellClass* const local, CellClass** const child, const int inLevel) {
FDEBUG( FDebug::Controller << "L2L : " << local->getMortonIndex() << "\n" );
virtual void L2L(const CellClass* const local, CellClass** const child, const int inLevel) {
//FDEBUG( FDebug::Controller << "L2L : " << local->getMortonIndex() << "\n" );
}
/** Print the number of particules */
virtual void L2P(CellClass* const pole, FList<ParticuleClass*>* const particules){
FDEBUG( FDebug::Controller << "L2P : " << particules->getSize() << "\n" );
virtual void L2P(const CellClass* const pole, FList<ParticuleClass*>* const particules){
//FDEBUG( FDebug::Controller << "L2P : " << particules->getSize() << "\n" );
}
/** Print the number of particules */
virtual void P2P(FList<ParticuleClass*>* const currentBox, FList<ParticuleClass*>** directNeighbors, const int size) {
FDEBUG( FDebug::Controller << "P2P : " << currentBox->getSize() << " (" << size << ")\n" );
virtual void P2P(FList<ParticuleClass*>* const currentBox, const FList<ParticuleClass*>*const* directNeighbors, const int size) {
//FDEBUG( FDebug::Controller << "P2P : " << currentBox->getSize() << " (" << size << ")\n" );
}
};
......
......@@ -19,7 +19,7 @@
* It just iterates on a tree and call the kernls with good arguments
*/
template<template< class ParticuleClass, class CellClass> class KernelClass, class ParticuleClass, class CellClass, int OctreeHeight, int SubtreeHeight>
class FFMMAlgorithm : public FAssertable{
class FFMMAlgorithm : protected FAssertable{
FOctree<ParticuleClass, CellClass, OctreeHeight, SubtreeHeight>* const tree; //< The octree to work on
KernelClass<ParticuleClass, CellClass>* const kernels; //< The kernels
......@@ -36,6 +36,7 @@ public:
: tree(inTree) , kernels(inKernels) {
assert(tree, "tree cannot be null", __LINE__, __FILE__);
assert(kernels, "kernels cannot be null", __LINE__, __FILE__);
FDEBUG_TRACE(FDebug::Controller.write("FFMMAlgorithm\n"));
}
/** Default destructor */
......@@ -82,6 +83,9 @@ public:
typename FOctree<ParticuleClass, CellClass, OctreeHeight, SubtreeHeight>::Iterator octreeIterator(tree);
octreeIterator.gotoBottomLeft();
octreeIterator.moveUp();
typename FOctree<ParticuleClass, CellClass, OctreeHeight, SubtreeHeight>::Iterator avoidGotoLeftIterator(octreeIterator);
// for each levels
for(int idxLevel = OctreeHeight - 2 ; idxLevel > 1 ; --idxLevel ){
// for each cells
......@@ -90,8 +94,10 @@ public:
// child is an array (of 8 child) that may be null
kernels->M2M( octreeIterator.getCurrentCell() , octreeIterator.getCurrentChild(), idxLevel);
} while(octreeIterator.moveRight());
octreeIterator.moveUp();
octreeIterator.gotoLeft();
//octreeIterator.moveUp();
//octreeIterator.gotoLeft();
avoidGotoLeftIterator.moveUp();
octreeIterator = avoidGotoLeftIterator;
}
FDEBUG_TIME(counter.tac(););
......@@ -106,16 +112,21 @@ public:
{ // first M2L
typename FOctree<ParticuleClass, CellClass, OctreeHeight, SubtreeHeight>::Iterator octreeIterator(tree);
octreeIterator.moveDown();
typename FOctree<ParticuleClass, CellClass, OctreeHeight, SubtreeHeight>::Iterator avoidGotoLeftIterator(octreeIterator);
CellClass* neighbors[208];
// for each levels
for(int idxLevel = 2 ; idxLevel < OctreeHeight ; ++idxLevel ){
// for each cells
do{
const int counter = tree->getDistantNeighbors(neighbors, octreeIterator.getCurrentGlobalIndex(),idxLevel);
kernels->M2L( octreeIterator.getCurrentCell() , neighbors, counter, idxLevel);
if(counter) kernels->M2L( octreeIterator.getCurrentCell() , neighbors, counter, idxLevel);
} while(octreeIterator.moveRight());
octreeIterator.gotoLeft();
octreeIterator.moveDown();
//octreeIterator.gotoLeft();
//octreeIterator.moveDown();
avoidGotoLeftIterator.moveDown();
octreeIterator = avoidGotoLeftIterator;
}
}
FDEBUG_TIME(counter.tac(););
......@@ -126,6 +137,9 @@ public:
{ // second L2L
typename FOctree<ParticuleClass, CellClass, OctreeHeight, SubtreeHeight>::Iterator octreeIterator(tree);
octreeIterator.moveDown();
typename FOctree<ParticuleClass, CellClass, OctreeHeight, SubtreeHeight>::Iterator avoidGotoLeftIterator(octreeIterator);
const int heightMinusOne = OctreeHeight - 1;
// for each levels exepted leaf level
for(int idxLevel = 2 ; idxLevel < heightMinusOne ; ++idxLevel ){
......@@ -133,8 +147,10 @@ public:
do{
kernels->L2L( octreeIterator.getCurrentCell() , octreeIterator.getCurrentChild(), idxLevel);
} while(octreeIterator.moveRight());
octreeIterator.gotoLeft();
octreeIterator.moveDown();
//octreeIterator.gotoLeft();
//octreeIterator.moveDown();
avoidGotoLeftIterator.moveDown();
octreeIterator = avoidGotoLeftIterator;
}
}
......@@ -148,6 +164,8 @@ public:
FDEBUG_TRACE( FDebug::Controller.write("\tStart Direct Pass\n").write(FDebug::Flush); );
FDEBUG_TIME(counter.tic(););
const int heightMinusOne = OctreeHeight - 1;
typename FOctree<ParticuleClass, CellClass, OctreeHeight, SubtreeHeight>::Iterator octreeIterator(tree);
octreeIterator.gotoBottomLeft();
// There is a maximum of 26 neighbors
......@@ -156,7 +174,7 @@ public:
do{
kernels->L2P(octreeIterator.getCurrentCell(), octreeIterator.getCurrentList());
// need the current particules and neighbors particules
const int counter = tree->getLeafsNeighbors(neighbors, octreeIterator.getCurrentGlobalIndex(),OctreeHeight-1);
const int counter = tree->getLeafsNeighbors(neighbors, octreeIterator.getCurrentGlobalIndex(),heightMinusOne);
kernels->P2P( octreeIterator.getCurrentList() , neighbors, counter);
} while(octreeIterator.moveRight());
......
#ifndef FFMMALGORITHMTHREADED_HPP
#define FFMMALGORITHMTHREADED_HPP
// /!\ Please, you must read the license at the bottom of this page
#include "../Utils/FAssertable.hpp"
#include "../Utils/FDebug.hpp"
#include "../Utils/FTic.hpp"
#include "../Utils/FOpenMPThread.hpp"
#include "../Containers/FOctree.hpp"
#include <omp.h>
/**
* @author Berenger Bramas (berenger.bramas@inria.fr)
* @class FFMMAlgorithmThreaded
* @brief
* Please read the license
*
*/
template<template< class ParticuleClass, class CellClass> class KernelClass, class ParticuleClass, class CellClass, int OctreeHeight, int SubtreeHeight>
class FFMMAlgorithmThreaded : protected FAssertable{
typedef typename FOctree<ParticuleClass, CellClass, OctreeHeight, SubtreeHeight>::Iterator FOctreeIterator;
static const int NbThreads = 4;
FOctree<ParticuleClass, CellClass, OctreeHeight, SubtreeHeight>* const tree; //< The octree to work on
KernelClass<ParticuleClass, CellClass>* kernels[NbThreads]; //< The kernels
FDEBUG_TIME(FTic counter); //< In case of debug count the time
public:
/** The constructor need the octree and the kernels used for computation
* @param inTree the octree
* @param inKernels the kernels
* an assert is launched if one of the arguments is null
*/
FFMMAlgorithmThreaded(FOctree<ParticuleClass, CellClass, OctreeHeight, SubtreeHeight>* const inTree,
KernelClass<ParticuleClass, CellClass>* const inKernels)
: tree(inTree) {
assert(tree, "tree cannot be null", __LINE__, __FILE__);
assert(kernels, "kernels cannot be null", __LINE__, __FILE__);
for(int idxThread = 0 ; idxThread < NbThreads ; ++idxThread){
this->kernels[idxThread] = new KernelClass<ParticuleClass, CellClass>(*inKernels);
}
FDEBUG_TRACE(FDebug::Controller.write("FFMMAlgorithmThreaded\n"));
}
/** Default destructor */
virtual ~FFMMAlgorithmThreaded(){
for(int idxThread = 0 ; idxThread < NbThreads ; ++idxThread){
delete this->kernels[idxThread];
}
}
/** To execute the fmm algorithm
* Call this function to run the complete algo
*/
void execute(){
for(int idxThread = 0 ; idxThread < NbThreads ; ++idxThread){
this->kernels[idxThread]->init();
}
bottomPass();
upwardPass();
downardPass();
directPass();
}
/** P2M */
void bottomPass(){
FDEBUG_TRACE( FDebug::Controller.write("\tStart Bottom Pass\n").write(FDebug::Flush) );
FDEBUG_TIME(counter.tic(););
FOctreeIterator octreeIterator(tree);
// Iterate on leafs
octreeIterator.gotoBottomLeft();
omp_lock_t mutex;
omp_init_lock(&mutex);
bool stop = false;
#pragma omp parallel shared(octreeIterator,mutex,stop) num_threads(NbThreads)
{
const int threadId = omp_get_thread_num();
omp_set_lock(&mutex);
while(!stop){
CellClass*const cell = octreeIterator.getCurrentCell();
const FList<ParticuleClass*>* const particules = octreeIterator.getCurrentList();
if(!octreeIterator.moveRight()) stop = true;
omp_unset_lock(&mutex);
// We need the current cell that represent the leaf
// and the list of particules
kernels[threadId]->P2M( cell , particules);
omp_set_lock(&mutex);
}
omp_unset_lock(&mutex);
}
omp_destroy_lock(&mutex);
FDEBUG_TIME(counter.tac(););
FDEBUG_TRACE( FDebug::Controller << "\tFinished (") FDEBUG_TIME(<< counter.elapsed() <<) FDEBUG_TRACE("s)\n"; )
}
/** M2M */
void upwardPass(){
FDEBUG_TRACE( FDebug::Controller.write("\tStart Upward Pass\n").write(FDebug::Flush); );
FDEBUG_TIME(counter.tic(););
FOctreeIterator octreeIterator(tree);
octreeIterator.gotoBottomLeft();
octreeIterator.moveUp();
omp_lock_t mutex;
omp_init_lock(&mutex);
// for each levels
for(int idxLevel = OctreeHeight - 2 ; idxLevel > 1 ; --idxLevel ){
bool stop = false;
#pragma omp parallel shared(octreeIterator,mutex,stop) num_threads(NbThreads)
{
const int threadId = omp_get_thread_num();
omp_set_lock(&mutex);
// for each cells
while(!stop){
// We need the current cell and the child
// child is an array (of 8 child) that may be null
CellClass*const cell = octreeIterator.getCurrentCell();
const CellClass*const*const child = octreeIterator.getCurrentChild();
if(!octreeIterator.moveRight()) stop = true;
omp_unset_lock(&mutex);
kernels[threadId]->M2M( cell , child, idxLevel);
omp_set_lock(&mutex);
}
omp_unset_lock(&mutex);
}
octreeIterator.moveUp();
octreeIterator.gotoLeft();
}
omp_destroy_lock(&mutex);
FDEBUG_TIME(counter.tac(););
FDEBUG_TRACE( FDebug::Controller << "\tFinished (") FDEBUG_TIME(<< counter.elapsed() <<) FDEBUG_TRACE("s)\n"; )
}
/** M2L L2L */
void downardPass(){
FDEBUG_TRACE( FDebug::Controller.write("\tStart Downward Pass (M2L)\n").write(FDebug::Flush); );
FDEBUG_TIME(counter.tic(););
{ // first M2L
FOctreeIterator octreeIterator(tree);
octreeIterator.moveDown();
omp_lock_t mutex;
omp_init_lock(&mutex);
// for each levels
for(int idxLevel = 2 ; idxLevel < OctreeHeight ; ++idxLevel ){
bool stop = false;
#pragma omp parallel shared(octreeIterator,mutex,idxLevel,stop) num_threads(NbThreads)
{
const int threadId = omp_get_thread_num();
CellClass* neighbors[208];
omp_set_lock(&mutex);
// for each cells
while(!stop){
CellClass* const cell = octreeIterator.getCurrentCell();
const MortonIndex cellIndex = octreeIterator.getCurrentGlobalIndex();
if(!octreeIterator.moveRight()) stop = true;
omp_unset_lock(&mutex);
const int counter = tree->getDistantNeighbors(neighbors, cellIndex,idxLevel);
if(counter) kernels[threadId]->M2L( cell, neighbors, counter, idxLevel);
omp_set_lock(&mutex);
}
omp_unset_lock(&mutex);
}
octreeIterator.gotoLeft();
octreeIterator.moveDown();
}
omp_destroy_lock(&mutex);
}
FDEBUG_TIME(counter.tac(););
FDEBUG_TRACE( FDebug::Controller << "\tFinished (") FDEBUG_TIME(<< counter.elapsed() <<) FDEBUG_TRACE("s)\n"; )
FDEBUG_TRACE( FDebug::Controller.write("\tStart Downward Pass (L2L)\n").write(FDebug::Flush); );
FDEBUG_TIME(counter.tic(););
{ // second L2L
FOctreeIterator octreeIterator(tree);
octreeIterator.moveDown();
const int heightMinusOne = OctreeHeight - 1;
omp_lock_t mutex;
omp_init_lock(&mutex);
// for each levels exepted leaf level
for(int idxLevel = 2 ; idxLevel < heightMinusOne ; ++idxLevel ){
bool stop = false;
#pragma omp parallel shared(octreeIterator,mutex,idxLevel,stop) num_threads(NbThreads)
{
const int threadId = omp_get_thread_num();
omp_set_lock(&mutex);
// for each cells
while(!stop){
const CellClass * const cell = octreeIterator.getCurrentCell();
CellClass ** const child = octreeIterator.getCurrentChild();
if(!octreeIterator.moveRight()) stop = true;
omp_unset_lock(&mutex);
kernels[threadId]->L2L( cell, child, idxLevel);
omp_set_lock(&mutex);
}
omp_unset_lock(&mutex);
}
octreeIterator.gotoLeft();
octreeIterator.moveDown();
}
omp_destroy_lock(&mutex);
}
FDEBUG_TIME(counter.tac(););
FDEBUG_TRACE( FDebug::Controller << "\tFinished (") FDEBUG_TIME(<< counter.elapsed() <<) FDEBUG_TRACE("s)\n"; )
}
/** P2P */
void directPass(){
FDEBUG_TRACE( FDebug::Controller.write("\tStart Direct Pass\n").write(FDebug::Flush); );
FDEBUG_TIME(counter.tic(););
FOctreeIterator octreeIterator(tree);
octreeIterator.gotoBottomLeft();
omp_lock_t mutex;
omp_init_lock(&mutex);
bool stop = false;
#pragma omp parallel shared(octreeIterator,mutex,stop) num_threads(NbThreads)
{
const int threadId = omp_get_thread_num();
// There is a maximum of 26 neighbors
FList<ParticuleClass*>* neighbors[26];
const int heightMinusOne = OctreeHeight - 1;
omp_set_lock(&mutex);
// for each leafs
while(!stop){
const CellClass * const cell = octreeIterator.getCurrentCell();
FList<ParticuleClass*>* const particules = octreeIterator.getCurrentList();
const MortonIndex cellIndex = octreeIterator.getCurrentGlobalIndex();
if(!octreeIterator.moveRight()) stop = true;
omp_unset_lock(&mutex);
kernels[threadId]->L2P(cell, particules);
// need the current particules and neighbors particules
const int counter = tree->getLeafsNeighbors(neighbors, cellIndex,heightMinusOne);
kernels[threadId]->P2P( particules , neighbors, counter);
omp_set_lock(&mutex);
}
omp_unset_lock(&mutex);
}
omp_destroy_lock(&mutex);
FDEBUG_TIME(counter.tac(););
FDEBUG_TRACE( FDebug::Controller << "\tFinished (") FDEBUG_TIME(<< counter.elapsed() <<) FDEBUG_TRACE("s)\n"; )
}
};
#endif //FFMMALGORITHMTHREADED_HPP
// [--LICENSE--]
#ifndef FFMMALGORITHMTHREADEDTHREADED_HPP
#define FFMMALGORITHMTHREADEDTHREADED_HPP
// /!\ Please, you must read the license at the bottom of this page
#include "../Utils/FAssertable.hpp"
#include "../Utils/FDebug.hpp"
#include "../Utils/FTic.hpp"
#include "../Utils/FOpenMPThread.hpp"
#include "../Containers/FOctree.hpp"
#include <omp.h>
/**
* @author Berenger Bramas (berenger.bramas@inria.fr)
* @class FFMMAlgorithmThreadedInterval
* @brief
* Please read the license
*
*/
template<template< class ParticuleClass, class CellClass> class KernelClass, class ParticuleClass, class CellClass, int OctreeHeight, int SubtreeHeight>
class FFMMAlgorithmThreadedInterval : protected FAssertable{
typedef typename FOctree<ParticuleClass, CellClass, OctreeHeight, SubtreeHeight>::Iterator FOctreeIterator;
static const int NbThreads = 1;
static const int SizeInterval = 50;
FOctree<ParticuleClass, CellClass, OctreeHeight, SubtreeHeight>* const tree; //< The octree to work on
KernelClass<ParticuleClass, CellClass>* kernels[NbThreads]; //< The kernels
FDEBUG_TIME(FTic counter); //< In case of debug count the time
public:
/** The constructor need the octree and the kernels used for computation
* @param inTree the octree
* @param inKernels the kernels
* an assert is launched if one of the arguments is null
*/
FFMMAlgorithmThreadedInterval(FOctree<ParticuleClass, CellClass, OctreeHeight, SubtreeHeight>* const inTree,
KernelClass<ParticuleClass, CellClass>* const inKernels)
: tree(inTree) {
assert(tree, "tree cannot be null", __LINE__, __FILE__);
assert(kernels, "kernels cannot be null", __LINE__, __FILE__);
for(int idxThread = 0 ; idxThread < NbThreads ; ++idxThread){
this->kernels[idxThread] = new KernelClass<ParticuleClass, CellClass>();