Commit 184e6682 authored by berenger-bramas's avatar berenger-bramas

Clean cmake & Fmm algo

(adding comments etc.)

git-svn-id: svn+ssh://scm.gforge.inria.fr/svn/scalfmm/scalfmm/trunk@65 2616d619-271b-44dc-8df4-d4a8f33a7222
parent ce429500
......@@ -9,10 +9,10 @@ endif(insource)
# Options
OPTION( SCALFMM_USE_CBLAS "Set to ON to build ScaFMM with BLAS" ON )
OPTION( SCALFMM_USE_MPI "Set to ON to build ScaFMM with MPI" ON )
OPTION( BUILD_TESTS "Set to ON to build fonctionnalities Tests" ON )
OPTION( BUILD_UTESTS "Set to ON to build UTests" ON )
OPTION( SCALFMM_BUILD_TESTS "Set to ON to build fonctionnalities Tests" ON )
OPTION( SCALFMM_BUILD_UTESTS "Set to ON to build UTests" ON )
# MPI option
# MPI option has to be set before project
if( SCALFMM_USE_MPI )
SET(CMAKE_CXX_COMPILER mpicxx)
endif()
......@@ -42,14 +42,14 @@ CONFIGURE_FILE( ${CMAKE_SOURCE_DIR}/Src/ScalFmmConfig.h.cmake
add_subdirectory(Src)
# Build - Tests
MESSAGE( STATUS "BUILD_TESTS = ${BUILD_TESTS}" )
if( BUILD_TESTS )
MESSAGE( STATUS "SCALFMM_BUILD_TESTS = ${SCALFMM_BUILD_TESTS}" )
if( SCALFMM_BUILD_TESTS )
add_subdirectory(Tests)
endif()
# Build - UTests
MESSAGE( STATUS "BUILD_UTESTS = ${BUILD_UTESTS}" )
if( BUILD_UTESTS )
MESSAGE( STATUS "SCALFMM_BUILD_UTESTS = ${SCALFMM_BUILD_UTESTS}" )
if( SCALFMM_BUILD_UTESTS )
add_subdirectory(UTests)
endif()
......
......@@ -20,6 +20,6 @@ add_library(
# Adding the entire project dir as an include dir
INCLUDE_DIRECTORIES(
${CMAKE_BINARY_DIR}/Sources
${CMAKE_BINARY_DIR}/Src
)
......@@ -33,9 +33,6 @@ class FFmmAlgorithm : protected FAssertable{
Octree* const tree; //< The octree to work on
KernelClass<ParticleClass, CellClass, OctreeHeight>* const kernels; //< The kernels
FDEBUG(FTic counterTime); //< In case of debug: to count the elapsed time
FDEBUG(FTic computationCounter); //< In case of debug: to count computation time
public:
/** The constructor need the octree and the kernels used for computation
* @param inTree the octree to work on
......@@ -62,23 +59,26 @@ public:
void execute(){
FTRACE( FTrace::Controller.enterFunction(FTrace::FMM, __FUNCTION__ , __FILE__ , __LINE__) );
kernels->init();
bottomPass();
upwardPass();
downardPass();
directPass();
FTRACE( FTrace::Controller.leaveFunction(FTrace::FMM) );
}
/////////////////////////////////////////////////////////////////////////////
// P2M
/////////////////////////////////////////////////////////////////////////////
/** P2M */
void bottomPass(){
FTRACE( FTrace::Controller.enterFunction(FTrace::FMM, __FUNCTION__ , __FILE__ , __LINE__) );
FDEBUG( FDebug::Controller.write("\tStart Bottom Pass\n").write(FDebug::Flush) );
FDEBUG( counterTime.tic() );
FDEBUG( double totalComputation = 0 );
FDEBUG(FTic counterTime);
FDEBUG(FTic computationCounter);
FOctreeIterator octreeIterator(tree);
......@@ -90,21 +90,23 @@ public:
FDEBUG(computationCounter.tic());
kernels->P2M( octreeIterator.getCurrentCell() , octreeIterator.getCurrentListSrc());
FDEBUG(computationCounter.tac());
FDEBUG(totalComputation += computationCounter.elapsed());
} while(octreeIterator.moveRight());
FDEBUG( counterTime.tac() );
FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.elapsed() << "s)\n" );
FDEBUG( FDebug::Controller << "\t\t Computation : " << totalComputation << " s\n" );
FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.tacAndElapsed() << "s)\n" );
FDEBUG( FDebug::Controller << "\t\t Computation : " << computationCounter.cumulated() << " s\n" );
FTRACE( FTrace::Controller.leaveFunction(FTrace::FMM) );
}
/////////////////////////////////////////////////////////////////////////////
// Upward
/////////////////////////////////////////////////////////////////////////////
/** M2M */
void upwardPass(){
FTRACE( FTrace::Controller.enterFunction(FTrace::FMM, __FUNCTION__ , __FILE__ , __LINE__) );
FDEBUG( FDebug::Controller.write("\tStart Upward Pass\n").write(FDebug::Flush); );
FDEBUG( counterTime.tic() );
FDEBUG( double totalComputation = 0 );
FDEBUG(FTic counterTime);
FDEBUG(FTic computationCounter);
// Start from leal level - 1
FOctreeIterator octreeIterator(tree);
......@@ -122,27 +124,31 @@ public:
FDEBUG(computationCounter.tic());
kernels->M2M( octreeIterator.getCurrentCell() , octreeIterator.getCurrentChild(), idxLevel);
FDEBUG(computationCounter.tac());
FDEBUG(totalComputation += computationCounter.elapsed());
} while(octreeIterator.moveRight());
avoidGotoLeftIterator.moveUp();
octreeIterator = avoidGotoLeftIterator;// equal octreeIterator.moveUp(); octreeIterator.gotoLeft();
}
FDEBUG( counterTime.tac() );
FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.elapsed() << "s)\n" );
FDEBUG( FDebug::Controller << "\t\t Computation : " << totalComputation << " s\n" );
FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.tacAndElapsed() << "s)\n" );
FDEBUG( FDebug::Controller << "\t\t Computation : " << computationCounter.cumulated() << " s\n" );
FTRACE( FTrace::Controller.leaveFunction(FTrace::FMM) );
}
/////////////////////////////////////////////////////////////////////////////
// Downward
/////////////////////////////////////////////////////////////////////////////
/** M2L L2L */
void downardPass(){
FTRACE( FTrace::Controller.enterFunction(FTrace::FMM, __FUNCTION__ , __FILE__ , __LINE__) );
FDEBUG( FDebug::Controller.write("\tStart Downward Pass (M2L)\n").write(FDebug::Flush); );
FDEBUG( counterTime.tic() );
FDEBUG( double totalComputation = 0 );
{ // first M2L
FDEBUG( FDebug::Controller.write("\tStart Downward Pass (M2L)\n").write(FDebug::Flush); );
FDEBUG(FTic counterTime);
FDEBUG(FTic computationCounter);
FOctreeIterator octreeIterator(tree);
octreeIterator.moveDown();
......@@ -153,25 +159,24 @@ public:
for(int idxLevel = 2 ; idxLevel < OctreeHeight ; ++idxLevel ){
// for each cells
do{
FDEBUG(computationCounter.tic());
const int counter = tree->getDistantNeighbors(neighbors, octreeIterator.getCurrentGlobalIndex(),idxLevel);
FDEBUG(computationCounter.tic());
if(counter) kernels->M2L( octreeIterator.getCurrentCell() , neighbors, counter, idxLevel);
FDEBUG(computationCounter.tac());
FDEBUG(totalComputation += computationCounter.elapsed());
} while(octreeIterator.moveRight());
avoidGotoLeftIterator.moveDown();
octreeIterator = avoidGotoLeftIterator;
}
FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.tacAndElapsed() << "s)\n" );
FDEBUG( FDebug::Controller << "\t\t Computation : " << computationCounter.cumulated() << " s\n" );
}
FDEBUG( counterTime.tac() );
FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.elapsed() << "s)\n" );
FDEBUG( FDebug::Controller << "\t\t Computation : " << totalComputation << " s\n" );
FDEBUG( FDebug::Controller.write("\tStart Downward Pass (L2L)\n").write(FDebug::Flush); );
FDEBUG( counterTime.tic() );
FDEBUG( totalComputation = 0 );
{ // second L2L
{ // second L2L
FDEBUG( FDebug::Controller.write("\tStart Downward Pass (L2L)\n").write(FDebug::Flush); );
FDEBUG(FTic counterTime);
FDEBUG(FTic computationCounter );
FOctreeIterator octreeIterator(tree);
octreeIterator.moveDown();
......@@ -185,26 +190,29 @@ public:
FDEBUG(computationCounter.tic());
kernels->L2L( octreeIterator.getCurrentCell() , octreeIterator.getCurrentChild(), idxLevel);
FDEBUG(computationCounter.tac());
FDEBUG(totalComputation += computationCounter.elapsed());
} while(octreeIterator.moveRight());
avoidGotoLeftIterator.moveDown();
octreeIterator = avoidGotoLeftIterator;
}
FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.tacAndElapsed() << "s)\n" );
FDEBUG( FDebug::Controller << "\t\t Computation : " << computationCounter.cumulated() << " s\n" );
}
FDEBUG( counterTime.tac() );
FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.elapsed() << "s)\n" );
FDEBUG( FDebug::Controller << "\t\t Computation : " << totalComputation << " s\n" );
FTRACE( FTrace::Controller.leaveFunction(FTrace::FMM) );
}
/////////////////////////////////////////////////////////////////////////////
// Direct
/////////////////////////////////////////////////////////////////////////////
/** P2P */
void directPass(){
FTRACE( FTrace::Controller.enterFunction(FTrace::FMM, __FUNCTION__ , __FILE__ , __LINE__) );
FDEBUG( FDebug::Controller.write("\tStart Direct Pass\n").write(FDebug::Flush); );
FDEBUG( counterTime.tic() );
FDEBUG( double totalComputation = 0 );
FDEBUG(FTic counterTime);
FDEBUG(FTic computationCounter);
const int heightMinusOne = OctreeHeight - 1;
......@@ -216,16 +224,17 @@ public:
do{
FDEBUG(computationCounter.tic());
kernels->L2P(octreeIterator.getCurrentCell(), octreeIterator.getCurrentListTargets());
FDEBUG(computationCounter.tac());
// need the current particles and neighbors particles
const int counter = tree->getLeafsNeighbors(neighbors, octreeIterator.getCurrentGlobalIndex(),heightMinusOne);
FDEBUG(computationCounter.tic());
kernels->P2P( octreeIterator.getCurrentListTargets(), octreeIterator.getCurrentListSrc() , neighbors, counter);
FDEBUG(computationCounter.tac());
FDEBUG(totalComputation += computationCounter.elapsed());
} while(octreeIterator.moveRight());
FDEBUG( counterTime.tac() );
FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.elapsed() << "s)\n" );
FDEBUG( FDebug::Controller << "\t\t Computation : " << totalComputation << " s\n" );
FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.tacAndElapsed() << "s)\n" );
FDEBUG( FDebug::Controller << "\t\t Computation : " << computationCounter.cumulated() << " s\n" );
FTRACE( FTrace::Controller.leaveFunction(FTrace::FMM) );
}
......
......@@ -40,15 +40,14 @@ class FFmmAlgorithmThread : protected FAssertable{
typedef KernelClass<ParticleClass, CellClass, OctreeHeight> Kernel;
Octree* const tree; //< The octree to work on
Kernel* kernels[FThreadNumbers]; //< The kernels
FDEBUG(FTic counterTime); //< In case of debug: to count the elapsed time
FDEBUG(FTic computationCounter); //< In case of debug: to count computation time
Kernel** kernels; //< The kernels
OctreeIterator* iterArray;
static const int SizeShape = 3*3*3;
int shapeLeaf[SizeShape];
int shapeLeaf[SizeShape];
const int MaxThreads;
public:
/** The constructor need the octree and the kernels used for computation
......@@ -57,12 +56,12 @@ public:
* An assert is launched if one of the arguments is null
*/
FFmmAlgorithmThread(Octree* const inTree, Kernel* const inKernels)
: tree(inTree) , iterArray(0) {
: tree(inTree) , kernels(0), iterArray(0), MaxThreads(omp_get_max_threads()) {
assert(tree, "tree cannot be null", __LINE__, __FILE__);
assert(kernels, "kernels cannot be null", __LINE__, __FILE__);
for(int idxThread = 0 ; idxThread < FThreadNumbers ; ++idxThread){
this->kernels = new Kernel*[MaxThreads];
for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread){
this->kernels[idxThread] = new KernelClass<ParticleClass, CellClass, OctreeHeight>(*inKernels);
}
......@@ -71,9 +70,10 @@ public:
/** Default destructor */
virtual ~FFmmAlgorithmThread(){
for(int idxThread = 0 ; idxThread < FThreadNumbers ; ++idxThread){
for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread){
delete this->kernels[idxThread];
}
delete [] this->kernels;
}
/**
......@@ -103,10 +103,6 @@ public:
iterArray = new OctreeIterator[leafs];
assert(iterArray, "iterArray bad alloc", __LINE__, __FILE__);
for(int idxThread = 0 ; idxThread < FThreadNumbers ; ++idxThread){
this->kernels[idxThread]->init();
}
bottomPass();
upwardPass();
......@@ -120,11 +116,15 @@ public:
FTRACE( FTrace::Controller.leaveFunction(FTrace::FMM) );
}
/////////////////////////////////////////////////////////////////////////////
// P2M
/////////////////////////////////////////////////////////////////////////////
/** P2M */
void bottomPass(){
FTRACE( FTrace::Controller.enterFunction(FTrace::FMM, __FUNCTION__ , __FILE__ , __LINE__) );
FDEBUG( FDebug::Controller.write("\tStart Bottom Pass\n").write(FDebug::Flush) );
FDEBUG( counterTime.tic() );
FDEBUG(FTic counterTime);
OctreeIterator octreeIterator(tree);
int leafs = 0;
......@@ -135,8 +135,8 @@ public:
++leafs;
} while(octreeIterator.moveRight());
FDEBUG(computationCounter.tic());
#pragma omp parallel num_threads(FThreadNumbers)
FDEBUG(FTic computationCounter);
#pragma omp parallel
{
Kernel * const myThreadkernels = kernels[omp_get_thread_num()];
#pragma omp for
......@@ -146,20 +146,23 @@ public:
myThreadkernels->P2M( iterArray[idxLeafs].getCurrentCell() , iterArray[idxLeafs].getCurrentListSrc());
}
}
FDEBUG(computationCounter.tac());
FDEBUG(computationCounter.tac() );
FDEBUG( counterTime.tac() );
FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.elapsed() << "s)\n" );
FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.tacAndElapsed() << "s)\n" );
FDEBUG( FDebug::Controller << "\t\t Computation : " << computationCounter.elapsed() << " s\n" );
FTRACE( FTrace::Controller.leaveFunction(FTrace::FMM) );
}
/////////////////////////////////////////////////////////////////////////////
// Upward
/////////////////////////////////////////////////////////////////////////////
/** M2M */
void upwardPass(){
FTRACE( FTrace::Controller.enterFunction(FTrace::FMM, __FUNCTION__ , __FILE__ , __LINE__) );
FDEBUG( FDebug::Controller.write("\tStart Upward Pass\n").write(FDebug::Flush); );
FDEBUG( counterTime.tic() );
FDEBUG( double totalComputation = 0 );
FDEBUG(FTic counterTime);
FDEBUG(FTic computationCounter);
// Start from leal level - 1
OctreeIterator octreeIterator(tree);
......@@ -179,7 +182,7 @@ public:
octreeIterator = avoidGotoLeftIterator;// equal octreeIterator.moveUp(); octreeIterator.gotoLeft();
FDEBUG(computationCounter.tic());
#pragma omp parallel num_threads(FThreadNumbers)
#pragma omp parallel
{
Kernel * const myThreadkernels = kernels[omp_get_thread_num()];
#pragma omp for
......@@ -190,23 +193,27 @@ public:
}
}
FDEBUG(computationCounter.tac());
FDEBUG(totalComputation += computationCounter.elapsed());
}
FDEBUG( counterTime.tac() );
FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.elapsed() << "s)\n" );
FDEBUG( FDebug::Controller << "\t\t Computation : " << totalComputation << " s\n" );
FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.tacAndElapsed() << "s)\n" );
FDEBUG( FDebug::Controller << "\t\t Computation : " << computationCounter.cumulated() << " s\n" );
FTRACE( FTrace::Controller.leaveFunction(FTrace::FMM) );
}
/////////////////////////////////////////////////////////////////////////////
// Downard
/////////////////////////////////////////////////////////////////////////////
/** M2L L2L */
void downardPass(){
FTRACE( FTrace::Controller.enterFunction(FTrace::FMM, __FUNCTION__ , __FILE__ , __LINE__) );
FDEBUG( FDebug::Controller.write("\tStart Downward Pass (M2L)\n").write(FDebug::Flush); );
FDEBUG( counterTime.tic() );
FDEBUG( double totalComputation = 0 );
{ // first M2L
FDEBUG( FDebug::Controller.write("\tStart Downward Pass (M2L)\n").write(FDebug::Flush); );
FDEBUG(FTic counterTime);
FDEBUG(FTic computationCounter);
OctreeIterator octreeIterator(tree);
octreeIterator.moveDown();
OctreeIterator avoidGotoLeftIterator(octreeIterator);
......@@ -223,7 +230,7 @@ public:
octreeIterator = avoidGotoLeftIterator;
FDEBUG(computationCounter.tic());
#pragma omp parallel num_threads(FThreadNumbers)
#pragma omp parallel
{
Kernel * const myThreadkernels = kernels[omp_get_thread_num()];
CellClass* neighbors[208];
......@@ -234,17 +241,17 @@ public:
}
}
FDEBUG(computationCounter.tac());
FDEBUG(totalComputation += computationCounter.elapsed());
}
FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.tacAndElapsed() << "s)\n" );
FDEBUG( FDebug::Controller << "\t\t Computation : " << computationCounter.cumulated() << " s\n" );
}
FDEBUG( counterTime.tac() );
FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.elapsed() << "s)\n" );
FDEBUG( FDebug::Controller << "\t\t Computation : " << totalComputation << " s\n" );
FDEBUG( FDebug::Controller.write("\tStart Downward Pass (L2L)\n").write(FDebug::Flush); );
FDEBUG( counterTime.tic() );
FDEBUG( totalComputation = 0 );
{ // second L2L
FDEBUG( FDebug::Controller.write("\tStart Downward Pass (L2L)\n").write(FDebug::Flush); );
FDEBUG(FTic counterTime);
FDEBUG(FTic computationCounter);
OctreeIterator octreeIterator(tree);
octreeIterator.moveDown();
......@@ -263,7 +270,7 @@ public:
octreeIterator = avoidGotoLeftIterator;
FDEBUG(computationCounter.tic());
#pragma omp parallel num_threads(FThreadNumbers)
#pragma omp parallel
{
Kernel * const myThreadkernels = kernels[omp_get_thread_num()];
#pragma omp for
......@@ -272,21 +279,25 @@ public:
}
}
FDEBUG(computationCounter.tac());
FDEBUG(totalComputation += computationCounter.elapsed());
}
FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.tacAndElapsed() << "s)\n" );
FDEBUG( FDebug::Controller << "\t\t Computation : " << computationCounter.cumulated() << " s\n" );
}
FDEBUG( counterTime.tac() );
FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.elapsed() << "s)\n" );
FDEBUG( FDebug::Controller << "\t\t Computation : " << totalComputation << " s\n" );
FTRACE( FTrace::Controller.leaveFunction(FTrace::FMM) );
}
/////////////////////////////////////////////////////////////////////////////
// Direct
/////////////////////////////////////////////////////////////////////////////
/** P2P */
void directPass(){
FTRACE( FTrace::Controller.enterFunction(FTrace::FMM, __FUNCTION__ , __FILE__ , __LINE__) );
FDEBUG( FDebug::Controller.write("\tStart Direct Pass\n").write(FDebug::Flush); );
FDEBUG( counterTime.tic() );
FDEBUG(FTic counterTime);
FDEBUG(FTic computationCounter);
OctreeIterator* shapeArray[SizeShape];
int countShape[SizeShape];
......@@ -317,7 +328,7 @@ public:
FDEBUG(computationCounter.tic());
for(int idxShape = 0 ; idxShape < SizeShape ; ++idxShape){
const int leafAtThisShape = this->shapeLeaf[idxShape];
#pragma omp parallel num_threads(FThreadNumbers)
#pragma omp parallel
{
Kernel * const myThreadkernels = kernels[omp_get_thread_num()];
// There is a maximum of 26 neighbors
......@@ -338,9 +349,8 @@ public:
delete [] shapeArray[idxShape];
}
FDEBUG( counterTime.tac() );
FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.elapsed() << "s)\n" );
FDEBUG( FDebug::Controller << "\t\t Computation : " << computationCounter.elapsed() << " s\n" );
FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.tacAndElapsed() << "s)\n" );
FDEBUG( FDebug::Controller << "\t\t Computation : " << computationCounter.cumulated() << " s\n" );
FTRACE( FTrace::Controller.leaveFunction(FTrace::FMM) );
}
......
......@@ -44,20 +44,17 @@ typedef FSingleApplication ApplicationImplementation;
* schedule(runtime)
*/
template<template< class ParticleClass, class CellClass, int OctreeHeight> class KernelClass,
class ParticleClass, class CellClass,
template<class ParticleClass> class LeafClass,
int OctreeHeight, int SubtreeHeight>
class FFmmAlgorithmThreadProc : protected FAssertable, protected ApplicationImplementation{
class ParticleClass, class CellClass,
template<class ParticleClass> class LeafClass,
int OctreeHeight, int SubtreeHeight>
class FFmmAlgorithmThreadProc : protected FAssertable, protected ApplicationImplementation{
// To reduce the size of variable type based on foctree in this file
typedef FOctree<ParticleClass, CellClass, LeafClass, OctreeHeight, SubtreeHeight> Octree;
typedef typename FOctree<ParticleClass, CellClass,LeafClass, OctreeHeight, SubtreeHeight>::Iterator OctreeIterator;
typedef KernelClass<ParticleClass, CellClass, OctreeHeight> Kernel;
Octree* const tree; //< The octree to work on
Kernel* kernels[FThreadNumbers]; //< The kernels
FDEBUG(FTic counterTime); //< In case of debug: to count the elapsed time
FDEBUG(FTic computationCounter); //< In case of debug: to count computation time
Kernel** kernels; //< The kernels
OctreeIterator* iterArray;
OctreeIterator* previousIterArray;
......@@ -69,6 +66,8 @@ class FFmmAlgorithmThreadProc : protected FAssertable, protected ApplicationImpl
int leftOffsets[OctreeHeight];
int rightOffsets[OctreeHeight];
const int MaxThreads;
void run(){}
void swapArray(){
......@@ -84,13 +83,14 @@ public:
* An assert is launched if one of the arguments is null
*/
FFmmAlgorithmThreadProc(Octree* const inTree, Kernel* const inKernels, const int inArgc, char ** const inArgv )
: ApplicationImplementation(inArgc,inArgv), tree(inTree) , iterArray(0),
previousIterArray(0), previousLeft(0),previousRight(0), previousSize(0) {
: ApplicationImplementation(inArgc,inArgv), tree(inTree) , kernels(0), iterArray(0),
previousIterArray(0), previousLeft(0),previousRight(0), previousSize(0),
MaxThreads(omp_get_max_threads()) {
assert(tree, "tree cannot be null", __LINE__, __FILE__);
assert(kernels, "kernels cannot be null", __LINE__, __FILE__);
for(int idxThread = 0 ; idxThread < FThreadNumbers ; ++idxThread){
this->kernels = new Kernel*[MaxThreads];
for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread){
this->kernels[idxThread] = new KernelClass<ParticleClass, CellClass, OctreeHeight>(*inKernels);
}
......@@ -99,9 +99,10 @@ public:
/** Default destructor */
virtual ~FFmmAlgorithmThreadProc(){
for(int idxThread = 0 ; idxThread < FThreadNumbers ; ++idxThread){
for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread){
delete this->kernels[idxThread];
}
delete [] this->kernels;
}
/**
......@@ -125,11 +126,6 @@ public:
previousIterArray = new OctreeIterator[leafs];
assert(previousIterArray, "previousIterArray bad alloc", __LINE__, __FILE__);
// init kernels
for(int idxThread = 0 ; idxThread < FThreadNumbers ; ++idxThread){
this->kernels[idxThread]->init();
}
// init offsets
for(int idxOff = 0 ; idxOff < OctreeHeight ; ++idxOff){
leftOffsets[idxOff] = 0;
......@@ -154,6 +150,10 @@ public:
FTRACE( FTrace::Controller.leaveFunction(FTrace::FMM) );
}
/////////////////////////////////////////////////////////////////////////////
// Utils functions
/////////////////////////////////////////////////////////////////////////////
int getLeft(const int idProc, const int inSize, const int nbOfProc) const {
const float step = (float(inSize) / nbOfProc);
return int(FMath::Ceil(step * idProc));
......@@ -171,11 +171,15 @@ public:
return int(position/step);
}
/////////////////////////////////////////////////////////////////////////////
// P2M
/////////////////////////////////////////////////////////////////////////////
/** P2M */
void bottomPass(){
FTRACE( FTrace::Controller.enterFunction(FTrace::FMM, __FUNCTION__ , __FILE__ , __LINE__) );
FDEBUG( FDebug::Controller.write("\tStart Bottom Pass\n").write(FDebug::Flush) );
FDEBUG( counterTime.tic() );
FDEBUG(FTic counterTime);
OctreeIterator octreeIterator(tree);
const int nbProcess = processCount();
......@@ -196,11 +200,11 @@ public:
this->previousRight = endIdx - 1;
this->previousSize = leafs;
FDEBUG(computationCounter.tic());
#pragma omp parallel num_threads(FThreadNumbers)
FDEBUG(FTic computationCounter);
#pragma omp parallel
{
Kernel * const myThreadkernels = kernels[omp_get_thread_num()];
#pragma omp for
#pragma omp for
for(int idxLeafs = startIdx ; idxLeafs < endIdx ; ++idxLeafs){
// We need the current cell that represent the leaf
// and the list of particles
......@@ -214,18 +218,24 @@ public:
this->previousRight = endIdx - 1;
this->previousSize = leafs;
FDEBUG( counterTime.tac() );
FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.elapsed() << "s)\n" );
FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.tacAndElapsed() << "s)\n" );
FDEBUG( FDebug::Controller << "\t\t Computation : " << computationCounter.elapsed() << " s\n" );
FTRACE( FTrace::Controller.leaveFunction(FTrace::FMM) );
}
/////////////////////////////////////////////////////////////////////////////
// Upward
/////////////////////////////////////////////////////////////////////////////
/** M2M */
void upwardPass(){
FTRACE( FTrace::Controller.enterFunction(FTrace::FMM, __FUNCTION__ , __FILE__ , __LINE__) );
FDEBUG( FDebug::Controller.write("\tStart Upward Pass\n").write(FDebug::Flush); );
FDEBUG( counterTime.tic() );
FDEBUG( double totalComputation = 0 );
FDEBUG(FTic counterTime);
FDEBUG(FTic computationCounter);
FDEBUG(FTic sendCounter);
FDEBUG(FTic receiveCounter);
// Start from leal level - 1
OctreeIterator octreeIterator(tree);
......@@ -238,7 +248,6 @@ public:
// for each levels
for(int idxLevel = OctreeHeight - 2 ; idxLevel > 1 ; --idxLevel ){
//print();
int leafs = 0;
// for each cells
......@@ -252,37 +261,32 @@ public:
const int startIdx = getLeft(idPorcess,leafs,nbProcess);
const int endIdx = getRight(idPorcess,leafs,nbProcess);
//std::cout << idPorcess << ">>--startIdx " << (startIdx) << " endIdx " << (endIdx) << std::endl;
//std::cout << idPorcess << ">>--previousLeft " << (previousLeft) << " previousRight " << (previousRight) << std::endl;
//std::cout << "level " << idxLevel << " start " << startIdx << " end " << endIdx << std::endl;
if(startIdx < leafs){
FDEBUG(sendCounter.tic());
int leftOffset = 0;
{
const MortonIndex MostLeftChild = iterArray[startIdx].getCurrentGlobalIndex() << 3;
const MortonIndex leftChildIter = previousIterArray[previousLeft].getCurrentGlobalIndex();
//std::cout << idPorcess << ">>--MostLeftChild " << (MostLeftChild) << " leftChildIter " << (leftChildIter) << std::endl;
if(leftChildIter < MostLeftChild){
int parentOffset = startIdx - 1;
MortonIndex parentIndex = iterArray[parentOffset].getCurrentGlobalIndex();
MortonIndex childIndex = 0;
while( (childIndex = previousIterArray[previousLeft+leftOffset].getCurrentGlobalIndex()) < MostLeftChild){
childIndex >>= 3;
//std::cout << "before loop" << std::endl;
while(childIndex != parentIndex){
if(childIndex < parentIndex) --parentOffset;
else ++parentOffset;
//std::cout << "parentOffset " << parentOffset << " parentIndex " << parentIndex << " childIndex " << childIndex << std::endl;
parentIndex = iterArray[parentOffset].getCurrentGlobalIndex();
//std::cout << "parentOffset " << parentOffset << " parentIndex " << parentIndex << " childIndex " << childIndex << std::endl;
}
//std::cout << "before send" << std::endl;
const int idxReceiver = getProc(parentOffset,leafs,nbProcess);
sendData(idxReceiver,sizeof(CellClass),previousIterArray[this->previousLeft+leftOffset].getCurrentCell(),previousLeft+leftOffset);
//std::cout << idPorcess << "\t>>-- sends left to " << (idxReceiver) << " index " << (previousLeft+leftOffset) << std::endl;
++leftOffset;
//std::cout << "before end big loop" << std::endl;
}
}
else if(this->previousLeft > 0 && leftChildIter > MostLeftChild){
......@@ -291,13 +295,12 @@ public:
}
}
}
//std::cout << idPorcess << ">>--leftOffset " << (leftOffset) << std::endl;
int rightOffset = 0;
{
const MortonIndex MostRightChild = (iterArray[endIdx-1].getCurrentGlobalIndex() << 3) | 7;
const MortonIndex rightChildIter = previousIterArray[previousRight].getCurrentGlobalIndex();
//std::cout << idPorcess << ">>--MostRightChild " << (MostRightChild) << " rightChildIter " << (rightChildIter) << std::endl;
if(this->previousRight < this->previousSize - 1 && rightChildIter < MostRightChild){
while( previousIterArray[previousRight-rightOffset+1].getCurrentGlobalIndex() <= MostRightChild){
--rightOffset;
......@@ -316,60 +319,55 @@ public:
}
const int idxReceiver = getProc(parentOffset,leafs,nbProcess);
sendData(idxReceiver,sizeof(CellClass),previousIterArray[this->previousRight-rightOffset].getCurrentCell(),previousRight-rightOffset);
//std::cout << idPorcess << "\t>>-- sends right to " << (idxReceiver) << " index " << (previousRight+rightOffset) << std::endl;
++rightOffset;