Commit 8964b252 authored by berenger-bramas's avatar berenger-bramas
Browse files

Created :

    Parallel version of Fmm Tsm (target or source model)
    +
    First shoot of the blas kernels (worked but net to be checked and cleaned)

git-svn-id: svn+ssh://scm.gforge.inria.fr/svn/scalfmm/scalfmm/trunk@40 2616d619-271b-44dc-8df4-d4a8f33a7222
parent 2bd08125
#ifndef FFMMALGORITHMARRAYTSM_HPP
#define FFMMALGORITHMARRAYTSM_HPP
// /!\ Please, you must read the license at the bottom of this page
#include "../Utils/FAssertable.hpp"
#include "../Utils/FDebug.hpp"
#include "../Utils/FTrace.hpp"
#include "../Utils/FTic.hpp"
#include "../Utils/FGlobal.hpp"
#include "../Containers/FOctree.hpp"
#include <omp.h>
/**
* @author Berenger Bramas (berenger.bramas@inria.fr)
* @class FFmmAlgorithmArrayTsm
* @brief
* Please read the license
*
* This class is a threaded FMM algorithm
* It just iterates on a tree and call the kernels with good arguments.
* It used the inspector-executor model :
* iterates on the tree and builds an array to work in parallel on this array
*
* Of course this class does not deallocate pointer given in arguements.
*
* Threaded & based on the inspector-executor model
* schedule(runtime)
*/
template<template< class ParticuleClass, class CellClass, int OctreeHeight> class KernelClass,
class ParticuleClass, class CellClass,
template<class ParticuleClass> class LeafClass,
int OctreeHeight, int SubtreeHeight>
class FFmmAlgorithmArrayTsm : protected FAssertable{
// To reduce the size of variable type based on foctree in this file
typedef FOctree<ParticuleClass, CellClass, LeafClass, OctreeHeight, SubtreeHeight> Octree;
typedef typename FOctree<ParticuleClass, CellClass,LeafClass, OctreeHeight, SubtreeHeight>::Iterator OctreeIterator;
typedef KernelClass<ParticuleClass, CellClass, OctreeHeight> Kernel;
Octree* const tree; //< The octree to work on
Kernel* kernels[FThreadNumbers]; //< The kernels
FDEBUG(FTic counterTime); //< In case of debug: to count the elapsed time
FDEBUG(FTic computationCounter); //< In case of debug: to count computation time
OctreeIterator* iterArray;
public:
/** The constructor need the octree and the kernels used for computation
* @param inTree the octree to work on
* @param inKernels the kernels to call
* An assert is launched if one of the arguments is null
*/
FFmmAlgorithmArrayTsm(Octree* const inTree, Kernel* const inKernels)
: tree(inTree) , iterArray(0) {
assert(tree, "tree cannot be null", __LINE__, __FILE__);
assert(kernels, "kernels cannot be null", __LINE__, __FILE__);
for(int idxThread = 0 ; idxThread < FThreadNumbers ; ++idxThread){
this->kernels[idxThread] = new KernelClass<ParticuleClass, CellClass, OctreeHeight>(*inKernels);
}
FDEBUG(FDebug::Controller << "FFmmAlgorithmArrayTsm\n");
}
/** Default destructor */
virtual ~FFmmAlgorithmArrayTsm(){
for(int idxThread = 0 ; idxThread < FThreadNumbers ; ++idxThread){
delete this->kernels[idxThread];
}
}
/**
* To execute the fmm algorithm
* Call this function to run the complete algorithm
*/
void execute(){
FTRACE( FTrace::Controller.enterFunction(FTrace::FMM, __FUNCTION__ , __FILE__ , __LINE__) );
// Count leaf
int leafs = 0;
OctreeIterator octreeIterator(tree);
octreeIterator.gotoBottomLeft();
do{
++leafs;
} while(octreeIterator.moveRight());
iterArray = new OctreeIterator[leafs];
assert(iterArray, "iterArray bad alloc", __LINE__, __FILE__);
for(int idxThread = 0 ; idxThread < FThreadNumbers ; ++idxThread){
this->kernels[idxThread]->init();
}
bottomPass();
upwardPass();
downardPass();
directPass();
delete [] iterArray;
iterArray = 0;
FTRACE( FTrace::Controller.leaveFunction(FTrace::FMM) );
}
/** P2M */
void bottomPass(){
FTRACE( FTrace::Controller.enterFunction(FTrace::FMM, __FUNCTION__ , __FILE__ , __LINE__) );
FDEBUG( FDebug::Controller.write("\tStart Bottom Pass\n").write(FDebug::Flush) );
FDEBUG( counterTime.tic() );
OctreeIterator octreeIterator(tree);
int leafs = 0;
// Iterate on leafs
octreeIterator.gotoBottomLeft();
do{
iterArray[leafs] = octreeIterator;
++leafs;
} while(octreeIterator.moveRight());
FDEBUG(computationCounter.tic());
#pragma omp parallel num_threads(FThreadNumbers)
{
Kernel * const myThreadkernels = kernels[omp_get_thread_num()];
#pragma omp for
for(int idxLeafs = 0 ; idxLeafs < leafs ; ++idxLeafs){
// We need the current cell that represent the leaf
// and the list of particules
FList<ParticuleClass*>* const sources = iterArray[idxLeafs].getCurrentListSources();
if(sources->getSize()){
iterArray[idxLeafs].getCurrentCell()->setSourcesChildTrue();
myThreadkernels->P2M( iterArray[idxLeafs].getCurrentCell() , sources);
}
if(iterArray[idxLeafs].getCurrentListTargets()->getSize()){
iterArray[idxLeafs].getCurrentCell()->setTargetsChildTrue();
}
}
}
FDEBUG(computationCounter.tac());
FDEBUG( counterTime.tac() );
FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.elapsed() << "s)\n" );
FDEBUG( FDebug::Controller << "\t\t Computation : " << computationCounter.elapsed() << " s\n" );
FTRACE( FTrace::Controller.leaveFunction(FTrace::FMM) );
}
/** M2M */
void upwardPass(){
FTRACE( FTrace::Controller.enterFunction(FTrace::FMM, __FUNCTION__ , __FILE__ , __LINE__) );
FDEBUG( FDebug::Controller.write("\tStart Upward Pass\n").write(FDebug::Flush); );
FDEBUG( counterTime.tic() );
FDEBUG( double totalComputation = 0 );
// Start from leal level - 1
OctreeIterator octreeIterator(tree);
octreeIterator.gotoBottomLeft();
octreeIterator.moveUp();
OctreeIterator avoidGotoLeftIterator(octreeIterator);
// for each levels
for(int idxLevel = OctreeHeight - 2 ; idxLevel > 1 ; --idxLevel ){
int leafs = 0;
// for each cells
do{
iterArray[leafs] = octreeIterator;
++leafs;
} while(octreeIterator.moveRight());
avoidGotoLeftIterator.moveUp();
octreeIterator = avoidGotoLeftIterator;// equal octreeIterator.moveUp(); octreeIterator.gotoLeft();
FDEBUG(computationCounter.tic());
#pragma omp parallel num_threads(FThreadNumbers)
{
Kernel * const myThreadkernels = kernels[omp_get_thread_num()];
#pragma omp for
for(int idxLeafs = 0 ; idxLeafs < leafs ; ++idxLeafs){
// We need the current cell and the child
// child is an array (of 8 child) that may be null
CellClass* potentialChild[8];
CellClass** const realChild = iterArray[idxLeafs].getCurrentChild();
CellClass* const currentCell = iterArray[idxLeafs].getCurrentCell();
for(int idxChild = 0 ; idxChild < 8 ; ++idxChild){
potentialChild[idxChild] = 0;
if(realChild[idxChild]){
if(realChild[idxChild]->hasSourcesChild()){
currentCell->setSourcesChildTrue();
potentialChild[idxChild] = realChild[idxChild];
}
if(realChild[idxChild]->hasTargetsChild()){
currentCell->setTargetsChildTrue();
}
}
}
myThreadkernels->M2M( currentCell , potentialChild, idxLevel);
}
}
FDEBUG(computationCounter.tac());
FDEBUG(totalComputation += computationCounter.elapsed());
}
FDEBUG( counterTime.tac() );
FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.elapsed() << "s)\n" );
FDEBUG( FDebug::Controller << "\t\t Computation : " << totalComputation << " s\n" );
FTRACE( FTrace::Controller.leaveFunction(FTrace::FMM) );
}
/** M2L L2L */
void downardPass(){
FTRACE( FTrace::Controller.enterFunction(FTrace::FMM, __FUNCTION__ , __FILE__ , __LINE__) );
FDEBUG( FDebug::Controller.write("\tStart Downward Pass (M2L)\n").write(FDebug::Flush); );
FDEBUG( counterTime.tic() );
FDEBUG( double totalComputation = 0 );
{ // first M2L
OctreeIterator octreeIterator(tree);
octreeIterator.moveDown();
OctreeIterator avoidGotoLeftIterator(octreeIterator);
// for each levels
for(int idxLevel = 2 ; idxLevel < OctreeHeight ; ++idxLevel ){
int leafs = 0;
// for each cells
do{
iterArray[leafs] = octreeIterator;
++leafs;
} while(octreeIterator.moveRight());
avoidGotoLeftIterator.moveDown();
octreeIterator = avoidGotoLeftIterator;
FDEBUG(computationCounter.tic());
#pragma omp parallel num_threads(FThreadNumbers)
{
Kernel * const myThreadkernels = kernels[omp_get_thread_num()];
CellClass* neighbors[208];
#pragma omp for
for(int idxLeafs = 0 ; idxLeafs < leafs ; ++idxLeafs){
CellClass* const currentCell = iterArray[idxLeafs].getCurrentCell();
if(currentCell->hasTargetsChild()){
const int counter = tree->getDistantNeighbors(neighbors, iterArray[idxLeafs].getCurrentGlobalIndex(),idxLevel);
int offsetTargetNeighbors = 0;
for(int idxRealNeighbors = 0 ; idxRealNeighbors < counter ; ++idxRealNeighbors, ++offsetTargetNeighbors){
if(neighbors[idxRealNeighbors]->hasSourcesChild()){
if(idxRealNeighbors != offsetTargetNeighbors){
neighbors[offsetTargetNeighbors] = neighbors[idxRealNeighbors];
}
}
else{
--offsetTargetNeighbors;
}
}
if(offsetTargetNeighbors){
myThreadkernels->M2L( currentCell , neighbors, offsetTargetNeighbors, idxLevel);
}
}
}
}
FDEBUG(computationCounter.tac());
FDEBUG(totalComputation += computationCounter.elapsed());
}
}
FDEBUG( counterTime.tac() );
FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.elapsed() << "s)\n" );
FDEBUG( FDebug::Controller << "\t\t Computation : " << totalComputation << " s\n" );
FDEBUG( FDebug::Controller.write("\tStart Downward Pass (L2L)\n").write(FDebug::Flush); );
FDEBUG( counterTime.tic() );
FDEBUG( totalComputation = 0 );
{ // second L2L
OctreeIterator octreeIterator(tree);
octreeIterator.moveDown();
OctreeIterator avoidGotoLeftIterator(octreeIterator);
const int heightMinusOne = OctreeHeight - 1;
// for each levels exepted leaf level
for(int idxLevel = 2 ; idxLevel < heightMinusOne ; ++idxLevel ){
int leafs = 0;
// for each cells
do{
iterArray[leafs] = octreeIterator;
++leafs;
} while(octreeIterator.moveRight());
avoidGotoLeftIterator.moveDown();
octreeIterator = avoidGotoLeftIterator;
FDEBUG(computationCounter.tic());
#pragma omp parallel num_threads(FThreadNumbers)
{
Kernel * const myThreadkernels = kernels[omp_get_thread_num()];
#pragma omp for
for(int idxLeafs = 0 ; idxLeafs < leafs ; ++idxLeafs){
CellClass* potentialChild[8];
CellClass** const realChild = iterArray[idxLeafs].getCurrentChild();
CellClass* const currentCell = iterArray[idxLeafs].getCurrentCell();
for(int idxChild = 0 ; idxChild < 8 ; ++idxChild){
if(realChild[idxChild] && realChild[idxChild]->hasTargetsChild()){
potentialChild[idxChild] = realChild[idxChild];
}
else{
potentialChild[idxChild] = 0;
}
}
myThreadkernels->L2L( currentCell , potentialChild, idxLevel);
}
}
FDEBUG(computationCounter.tac());
FDEBUG(totalComputation += computationCounter.elapsed());
}
}
FDEBUG( counterTime.tac() );
FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.elapsed() << "s)\n" );
FDEBUG( FDebug::Controller << "\t\t Computation : " << totalComputation << " s\n" );
FTRACE( FTrace::Controller.leaveFunction(FTrace::FMM) );
}
/** P2P */
void directPass(){
FTRACE( FTrace::Controller.enterFunction(FTrace::FMM, __FUNCTION__ , __FILE__ , __LINE__) );
FDEBUG( FDebug::Controller.write("\tStart Direct Pass\n").write(FDebug::Flush); );
FDEBUG( counterTime.tic() );
int leafs = 0;
{
OctreeIterator octreeIterator(tree);
octreeIterator.gotoBottomLeft();
// for each leafs
do{
iterArray[leafs] = octreeIterator;
++leafs;
} while(octreeIterator.moveRight());
}
const int heightMinusOne = OctreeHeight - 1;
FDEBUG(computationCounter.tic());
#pragma omp parallel num_threads(FThreadNumbers)
{
Kernel * const myThreadkernels = kernels[omp_get_thread_num()];
// There is a maximum of 26 neighbors
FList<ParticuleClass*>* neighbors[26];
#pragma omp for
for(int idxLeafs = 0 ; idxLeafs < leafs ; ++idxLeafs){
myThreadkernels->L2P(iterArray[idxLeafs].getCurrentCell(), iterArray[idxLeafs].getCurrentListTargets());
// need the current particules and neighbors particules
const int counter = tree->getLeafsNeighbors(neighbors, iterArray[idxLeafs].getCurrentGlobalIndex(),heightMinusOne);
myThreadkernels->P2P( iterArray[idxLeafs].getCurrentListTargets(), iterArray[idxLeafs].getCurrentListSources() , neighbors, counter);
}
}
FDEBUG(computationCounter.tac());
FDEBUG( counterTime.tac() );
FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.elapsed() << "s)\n" );
FDEBUG( FDebug::Controller << "\t\t Computation : " << computationCounter.elapsed() << " s\n" );
FTRACE( FTrace::Controller.leaveFunction(FTrace::FMM) );
}
};
#endif //FFMMALGORITHMARRAYTSM_HPP
// [--LICENSE--]
......@@ -22,10 +22,10 @@
* Of course this class does not deallocate pointer given in arguements.
*/
template<template< class ParticuleClass, class CellClass, int OctreeHeight> class KernelClass,
class ParticuleClass, class CellClass,
template<class ParticuleClass> class LeafClass,
int OctreeHeight, int SubtreeHeight>
class FFmmAlgorithmTsm : protected FAssertable{
class ParticuleClass, class CellClass,
template<class ParticuleClass> class LeafClass,
int OctreeHeight, int SubtreeHeight>
class FFmmAlgorithmTsm : protected FAssertable{
// To reduce the size of variable type based on foctree in this file
typedef FOctree<ParticuleClass, CellClass, LeafClass, OctreeHeight, SubtreeHeight> Octree;
typedef typename FOctree<ParticuleClass, CellClass,LeafClass, OctreeHeight, SubtreeHeight>::Iterator FOctreeIterator;
......@@ -33,17 +33,17 @@ class FFmmAlgorithmTsm : protected FAssertable{
Octree* const tree; //< The octree to work on
KernelClass<ParticuleClass, CellClass, OctreeHeight>* const kernels; //< The kernels
FDEBUG(FTic counterTime); //< In case of debug: to count the elapsed time
FDEBUG(FTic computationCounter); //< In case of debug: to count computation time
FDEBUG(FTic counterTime); //< In case of debug: to count the elapsed time
FDEBUG(FTic computationCounter); //< In case of debug: to count computation time
public:
public:
/** The constructor need the octree and the kernels used for computation
* @param inTree the octree to work on
* @param inKernels the kernels to call
* An assert is launched if one of the arguments is null
*/
FFmmAlgorithmTsm(Octree* const inTree, KernelClass<ParticuleClass,CellClass,OctreeHeight>* const inKernels)
: tree(inTree) , kernels(inKernels) {
: tree(inTree) , kernels(inKernels) {
assert(tree, "tree cannot be null", __LINE__, __FILE__);
assert(kernels, "kernels cannot be null", __LINE__, __FILE__);
......
This diff is collapsed.
......@@ -12,12 +12,12 @@
* Propose basic complexe class.
*/
class FComplexe {
FReal imag; //< Imaginary
FReal real; //< Real
FReal imag; //< Imaginary
public:
/** Default Constructor (set real&imaginary to 0) */
FComplexe() : imag(0), real(0){
FComplexe() : real(0),imag(0){
}
/** Constructor with values
......@@ -25,12 +25,12 @@ public:
* @param inReal the real
*/
FComplexe(const FReal inImag, const FReal inReal)
: imag(inImag), real(inReal){
: real(inReal),imag(inImag){
}
/** Copy constructor */
FComplexe(const FComplexe& other)
: imag(other.imag), real(other.real){
: real(other.real), imag(other.imag){
}
/** Copy operator */
......
......@@ -41,7 +41,7 @@ static const int FThreadNumbers = 4;
// Types
///////////////////////////////////////////////////////
typedef float FReal;
typedef double FReal;
///////////////////////////////////////////////////////
// Restrict
......
......@@ -13,9 +13,9 @@
* Propose basic math functions or indirections to std math.
*/
struct FMath{
static const FReal FPi; //< Pi constant
static const FReal FPiDiv2; //< Pi/2 constant
static const FReal Epsilon; //< Epsilon
static const double FPi; //< Pi constant
static const double FPiDiv2; //< Pi/2 constant
static const double Epsilon; //< Epsilon
/** To get absolute value */
template <class NumType>
......@@ -94,9 +94,9 @@ struct FMath{
}
};
const FReal FMath::FPi = M_PI;
const FReal FMath::FPiDiv2 = M_PI_2;
const FReal FMath::Epsilon = 0.00000000000000000001;
const double FMath::FPi = M_PI;
const double FMath::FPiDiv2 = M_PI_2;
const double FMath::Epsilon = 0.00000000000000000001;
#endif //FMATH_HPP
......
......@@ -15,11 +15,11 @@
// Compile by mpic++ testApplication.cpp ../Sources/Utils/FAssertable.cpp -o testApplication.exe
// run by mpirun -np 4 ./testApplication.exe
#include "../Sources/Utils/FMpiApplication.hpp"
#define ApplicationImplementation FMpiApplication
typedef FMpiApplication ApplicationImplementation;
#else
// Compile by g++ testApplication.cpp ../Sources/Utils/FAssertable.cpp -o testApplication.exe
#include "../Sources/Utils/FSingleApplication.hpp"
#define ApplicationImplementation FSingleApplication
typedef FSingleApplication ApplicationImplementation;
#endif
//================================================================================================
......
// /!\ Please, you must read the license at the bottom of this page
#include <iostream>
#include <stdio.h>
#include <stdlib.h>
#include "../Sources/Utils/FTic.hpp"
#include "../Sources/Containers/FOctree.hpp"
#include "../Sources/Containers/FList.hpp"
#include "../Sources/Components/FFmaParticule.hpp"
#include "../Sources/Extenssions/FExtendForces.hpp"
#include "../Sources/Extenssions/FExtendPotential.hpp"
#include "../Sources/Components/FBasicCell.hpp"
#include "../Sources/Fmb/FExtendFmbCell.hpp"
#include "../Sources/Core/FFmmAlgorithm.hpp"
#include "../Sources/Core/FFmmAlgorithmArray.hpp"
#include "../Sources/Core/FFmmAlgorithmThreaded.hpp"
#include "../Sources/Core/FFmmAlgorithmTask.hpp"
#include "../Sources/Components/FSimpleLeaf.hpp"
#include "../Sources/Fmb/FFmbKernelsBlas.hpp"
#include "../Sources/Files/FFMALoader.hpp"
// With openmp : g++ testFmbBlasAlgorithm.cpp ../Sources/Utils/FAssertable.cpp ../Sources/Utils/FDebug.cpp ../Sources/Utils/FTrace.cpp -lgomp -fopenmp -lblas -O2 -o testFmbBlasAlgorithm.exe
// icpc -openmp -openmp-lib=compat testFmbAlgorithm.cpp ../Sources/Utils/FAssertable.cpp ../Sources/Utils/FDebug.cpp -O2 -o testFmbAlgorithm.exe
/** This program show an example of use of
* the fmm basic algo
* it also check that eachh particules is little or longer
* related that each other
*/
/** Fmb class has to extend {FExtendForces,FExtendPotential,FExtendPhysicalValue}
* Because we use fma loader it needs {FFmaParticule}
*/
class FmbParticule : public FFmaParticule, public FExtendForces, public FExtendPotential {
public:
};
/** Custom cell
*
*/
class FmbCell : public FBasicCell, public FExtendFmbCell {
public:
};
// Simply create particules and try the kernels
int main(int argc, char ** argv){
///////////////////////What we do/////////////////////////////
std::cout << ">> This executable has to be used to test fmb algorithm.\n";
//////////////////////////////////////////////////////////////
const int NbLevels = 9;//10;
const int SizeSubLevels = 3;//3
FTic counter;
const char* const defaultFilename = "testLoaderFMA.fma"; //../../Data/ "testLoaderFMA.fma" "testFMAlgorithm.fma" Sphere.fma
const char* filename;
if(argc == 1){
std::cout << "You have to give a .fma file in argument.\n";
std::cout << "The program will try a default file : " << defaultFilename << "\n";
filename = defaultFilename;
}
else{
filename = argv[1];
std::cout << "Opening : " << filename << "\n";
}
FFMALoader<FmbParticule> loader(filename);
if(!loader.isValide()){
std::cout << "Loader Error, " << filename << " is missing\n";
return 1;
}
// -----------------------------------------------------
FOctree<FmbParticule, FmbCell, FSimpleLeaf, NbLevels, SizeSubLevels> tree(loader.getBoxWidth(),loader.getCenterOfBox());
// -----------------------------------------------------
std::cout << "Creating " << loader.getNumberOfParticules() << " particules ..." << std::endl;
counter.tic();
FmbParticule* particules = new FmbParticule[loader.getNumberOfParticules()];
for(int idxPart = 0 ; idxPart < loader.getNumberOfParticules() ; ++idxPart){
loader.fillParticule(&particules[idxPart]);
}
counter.tac();
std::cout << "Done " << "(" << counter.elapsed() << "s)." << std::endl;
// -----------------------------------------------------
std::cout << "Inserting particules ..." << std::endl;
counter.tic();
for(long idxPart = 0 ; idxPart < loader.getNumberOfParticules() ; ++idxPart){
tree.insert(&particules[idxPart]);
}
counter.tac();
std::cout << "Done " << "(" << counter.elapsed() << "s)." << std::endl;
// -----------------------------------------------------