Commit 69deecb3 authored by berenger-bramas's avatar berenger-bramas
Browse files

Prog to create spherical FMA file.

New multi-thread version.
(efficiency > 0.8 for 8 threads on 2.000.000 particules)

git-svn-id: svn+ssh://scm.gforge.inria.fr/svn/scalfmm/scalfmm/trunk@25 2616d619-271b-44dc-8df4-d4a8f33a7222
parent 987cee7d
......@@ -150,8 +150,8 @@ public:
for(int idxLevel = 2 ; idxLevel < OctreeHeight ; ++idxLevel ){
// for each cells
do{
const int counter = tree->getDistantNeighbors(neighbors, octreeIterator.getCurrentGlobalIndex(),idxLevel);
FDEBUG(computationCounter.tic());
const int counter = tree->getDistantNeighbors(neighbors, octreeIterator.getCurrentGlobalIndex(),idxLevel);
if(counter) kernels->M2L( octreeIterator.getCurrentCell() , neighbors, counter, idxLevel);
FDEBUG(computationCounter.tac());
FDEBUG(totalComputation += computationCounter.elapsed());
......@@ -213,11 +213,8 @@ public:
do{
FDEBUG(computationCounter.tic());
kernels->L2P(octreeIterator.getCurrentCell(), octreeIterator.getCurrentList());
FDEBUG(computationCounter.tac());
FDEBUG(totalComputation += computationCounter.elapsed());
// need the current particules and neighbors particules
const int counter = tree->getLeafsNeighbors(neighbors, octreeIterator.getCurrentGlobalIndex(),heightMinusOne);
FDEBUG(computationCounter.tic());
kernels->P2P( octreeIterator.getCurrentList() , neighbors, counter);
FDEBUG(computationCounter.tac());
FDEBUG(totalComputation += computationCounter.elapsed());
......
#ifndef FFMMALGORITHMARRAY_HPP
#define FFMMALGORITHMARRAY_HPP
// /!\ Please, you must read the license at the bottom of this page
#include "../Utils/FAssertable.hpp"
#include "../Utils/FDebug.hpp"
#include "../Utils/FTrace.hpp"
#include "../Utils/FTic.hpp"
#include "../Containers/FOctree.hpp"
#include <omp.h>
/**
* @author Berenger Bramas (berenger.bramas@inria.fr)
* @class FFMMAlgorithmArray
* @brief
* Please read the license
*
* This class is a basic FMM algorithm
* It just iterates on a tree and call the kernels with good arguments.
*
* Of course this class does not deallocate pointer given in arguements.
*
* schedule(runtime)
*/
template<template< class ParticuleClass, class CellClass, int OctreeHeight> class KernelClass, class ParticuleClass, class CellClass, int OctreeHeight, int SubtreeHeight>
class FFMMAlgorithmArray : protected FAssertable{
// To reduce the size of variable type based on foctree in this file
typedef FOctree<ParticuleClass, CellClass, OctreeHeight, SubtreeHeight> Octree;
typedef typename FOctree<ParticuleClass, CellClass, OctreeHeight, SubtreeHeight>::Iterator OctreeIterator;
typedef KernelClass<ParticuleClass, CellClass, OctreeHeight> Kernel;
static const int NbThreads = 4; //< Number of threads (currently a static number)
Octree* const tree; //< The octree to work on
Kernel* kernels[NbThreads]; //< The kernels
FDEBUG(FTic counter); //< In case of debug: to count the elapsed time
FDEBUG(FTic computationCounter); //< In case of debug: to count computation time
OctreeIterator* iterArray;
public:
/** The constructor need the octree and the kernels used for computation
* @param inTree the octree to work on
* @param inKernels the kernels to call
* An assert is launched if one of the arguments is null
*/
FFMMAlgorithmArray(Octree* const inTree, Kernel* const inKernels)
: tree(inTree) , iterArray(0) {
assert(tree, "tree cannot be null", __LINE__, __FILE__);
assert(kernels, "kernels cannot be null", __LINE__, __FILE__);
for(int idxThread = 0 ; idxThread < NbThreads ; ++idxThread){
this->kernels[idxThread] = new KernelClass<ParticuleClass, CellClass, OctreeHeight>(*inKernels);
}
FDEBUG(FDebug::Controller << "FFMMAlgorithmArray\n");
}
/** Default destructor */
virtual ~FFMMAlgorithmArray(){
for(int idxThread = 0 ; idxThread < NbThreads ; ++idxThread){
delete this->kernels[idxThread];
}
}
/**
* To execute the fmm algorithm
* Call this function to run the complete algorithm
*/
void execute(){
FTRACE( FTrace::Controller.enterFunction(FTrace::FMM, __FUNCTION__ , __FILE__ , __LINE__) );
// Count leaf
int leafs = 0;
OctreeIterator octreeIterator(tree);
octreeIterator.gotoBottomLeft();
do{
++leafs;
} while(octreeIterator.moveRight());
iterArray = new OctreeIterator[leafs];
assert(iterArray, "iterArray bad alloc", __LINE__, __FILE__);
for(int idxThread = 0 ; idxThread < NbThreads ; ++idxThread){
this->kernels[idxThread]->init();
}
bottomPass();
upwardPass();
downardPass();
directPass();
delete [] iterArray;
iterArray = 0;
FTRACE( FTrace::Controller.leaveFunction(FTrace::FMM) );
}
/** P2M */
void bottomPass(){
FTRACE( FTrace::Controller.enterFunction(FTrace::FMM, __FUNCTION__ , __FILE__ , __LINE__) );
FDEBUG( FDebug::Controller.write("\tStart Bottom Pass\n").write(FDebug::Flush) );
FDEBUG( counter.tic() );
OctreeIterator octreeIterator(tree);
int leafs = 0;
// Iterate on leafs
octreeIterator.gotoBottomLeft();
do{
iterArray[leafs] = octreeIterator;
++leafs;
} while(octreeIterator.moveRight());
FDEBUG(computationCounter.tic());
#pragma omp parallel num_threads(NbThreads)
{
Kernel * const myThreadkernels = kernels[omp_get_thread_num()];
#pragma omp for
for(int idxLeafs = 0 ; idxLeafs < leafs ; ++idxLeafs){
// We need the current cell that represent the leaf
// and the list of particules
myThreadkernels->P2M( iterArray[idxLeafs].getCurrentCell() , iterArray[idxLeafs].getCurrentList());
}
}
FDEBUG(computationCounter.tac());
FDEBUG( counter.tac() );
FDEBUG( FDebug::Controller << "\tFinished (" << counter.elapsed() << "s)\n" );
FDEBUG( FDebug::Controller << "\t\t Computation : " << computationCounter.elapsed() << " s\n" );
FTRACE( FTrace::Controller.leaveFunction(FTrace::FMM) );
}
/** M2M */
void upwardPass(){
FTRACE( FTrace::Controller.enterFunction(FTrace::FMM, __FUNCTION__ , __FILE__ , __LINE__) );
FDEBUG( FDebug::Controller.write("\tStart Upward Pass\n").write(FDebug::Flush); );
FDEBUG( counter.tic() );
FDEBUG( double totalComputation = 0 );
// Start from leal level - 1
OctreeIterator octreeIterator(tree);
octreeIterator.gotoBottomLeft();
octreeIterator.moveUp();
OctreeIterator avoidGotoLeftIterator(octreeIterator);
// for each levels
for(int idxLevel = OctreeHeight - 2 ; idxLevel > 1 ; --idxLevel ){
int leafs = 0;
// for each cells
do{
iterArray[leafs] = octreeIterator;
++leafs;
} while(octreeIterator.moveRight());
avoidGotoLeftIterator.moveUp();
octreeIterator = avoidGotoLeftIterator;// equal octreeIterator.moveUp(); octreeIterator.gotoLeft();
FDEBUG(computationCounter.tic());
#pragma omp parallel num_threads(NbThreads)
{
Kernel * const myThreadkernels = kernels[omp_get_thread_num()];
#pragma omp for
for(int idxLeafs = 0 ; idxLeafs < leafs ; ++idxLeafs){
// We need the current cell and the child
// child is an array (of 8 child) that may be null
myThreadkernels->M2M( iterArray[idxLeafs].getCurrentCell() , iterArray[idxLeafs].getCurrentChild(), idxLevel);
}
}
FDEBUG(computationCounter.tac());
FDEBUG(totalComputation += computationCounter.elapsed());
}
FDEBUG( counter.tac() );
FDEBUG( FDebug::Controller << "\tFinished (" << counter.elapsed() << "s)\n" );
FDEBUG( FDebug::Controller << "\t\t Computation : " << totalComputation << " s\n" );
FTRACE( FTrace::Controller.leaveFunction(FTrace::FMM) );
}
/** M2L L2L */
void downardPass(){
FTRACE( FTrace::Controller.enterFunction(FTrace::FMM, __FUNCTION__ , __FILE__ , __LINE__) );
FDEBUG( FDebug::Controller.write("\tStart Downward Pass (M2L)\n").write(FDebug::Flush); );
FDEBUG( counter.tic() );
FDEBUG( double totalComputation = 0 );
{ // first M2L
OctreeIterator octreeIterator(tree);
octreeIterator.moveDown();
OctreeIterator avoidGotoLeftIterator(octreeIterator);
// for each levels
for(int idxLevel = 2 ; idxLevel < OctreeHeight ; ++idxLevel ){
int leafs = 0;
// for each cells
do{
iterArray[leafs] = octreeIterator;
++leafs;
} while(octreeIterator.moveRight());
avoidGotoLeftIterator.moveDown();
octreeIterator = avoidGotoLeftIterator;
FDEBUG(computationCounter.tic());
#pragma omp parallel num_threads(NbThreads)
{
Kernel * const myThreadkernels = kernels[omp_get_thread_num()];
CellClass* neighbors[208];
#pragma omp for
for(int idxLeafs = 0 ; idxLeafs < leafs ; ++idxLeafs){
const int counter = tree->getDistantNeighbors(neighbors, iterArray[idxLeafs].getCurrentGlobalIndex(),idxLevel);
if(counter) myThreadkernels->M2L( iterArray[idxLeafs].getCurrentCell() , neighbors, counter, idxLevel);
}
}
FDEBUG(computationCounter.tac());
FDEBUG(totalComputation += computationCounter.elapsed());
}
}
FDEBUG( counter.tac() );
FDEBUG( FDebug::Controller << "\tFinished (" << counter.elapsed() << "s)\n" );
FDEBUG( FDebug::Controller << "\t\t Computation : " << totalComputation << " s\n" );
FDEBUG( FDebug::Controller.write("\tStart Downward Pass (L2L)\n").write(FDebug::Flush); );
FDEBUG( counter.tic() );
FDEBUG( totalComputation = 0 );
{ // second L2L
OctreeIterator octreeIterator(tree);
octreeIterator.moveDown();
OctreeIterator avoidGotoLeftIterator(octreeIterator);
const int heightMinusOne = OctreeHeight - 1;
// for each levels exepted leaf level
for(int idxLevel = 2 ; idxLevel < heightMinusOne ; ++idxLevel ){
int leafs = 0;
// for each cells
do{
iterArray[leafs] = octreeIterator;
++leafs;
} while(octreeIterator.moveRight());
avoidGotoLeftIterator.moveDown();
octreeIterator = avoidGotoLeftIterator;
FDEBUG(computationCounter.tic());
#pragma omp parallel num_threads(NbThreads)
{
Kernel * const myThreadkernels = kernels[omp_get_thread_num()];
#pragma omp for
for(int idxLeafs = 0 ; idxLeafs < leafs ; ++idxLeafs){
myThreadkernels->L2L( iterArray[idxLeafs].getCurrentCell() , iterArray[idxLeafs].getCurrentChild(), idxLevel);
}
}
FDEBUG(computationCounter.tac());
FDEBUG(totalComputation += computationCounter.elapsed());
}
}
FDEBUG( counter.tac() );
FDEBUG( FDebug::Controller << "\tFinished (" << counter.elapsed() << "s)\n" );
FDEBUG( FDebug::Controller << "\t\t Computation : " << totalComputation << " s\n" );
FTRACE( FTrace::Controller.leaveFunction(FTrace::FMM) );
}
/** P2P */
void directPass(){
FTRACE( FTrace::Controller.enterFunction(FTrace::FMM, __FUNCTION__ , __FILE__ , __LINE__) );
FDEBUG( FDebug::Controller.write("\tStart Direct Pass\n").write(FDebug::Flush); );
FDEBUG( counter.tic() );
int leafs = 0;
{
OctreeIterator octreeIterator(tree);
octreeIterator.gotoBottomLeft();
// for each leafs
do{
iterArray[leafs] = octreeIterator;
++leafs;
} while(octreeIterator.moveRight());
}
const int heightMinusOne = OctreeHeight - 1;
FDEBUG(computationCounter.tic());
#pragma omp parallel num_threads(NbThreads)
{
Kernel * const myThreadkernels = kernels[omp_get_thread_num()];
// There is a maximum of 26 neighbors
FList<ParticuleClass*>* neighbors[26];
#pragma omp for
for(int idxLeafs = 0 ; idxLeafs < leafs ; ++idxLeafs){
myThreadkernels->L2P(iterArray[idxLeafs].getCurrentCell(), iterArray[idxLeafs].getCurrentList());
// need the current particules and neighbors particules
const int counter = tree->getLeafsNeighbors(neighbors, iterArray[idxLeafs].getCurrentGlobalIndex(),heightMinusOne);
myThreadkernels->P2P( iterArray[idxLeafs].getCurrentList() , neighbors, counter);
}
}
FDEBUG(computationCounter.tac());
FDEBUG( counter.tac() );
FDEBUG( FDebug::Controller << "\tFinished (" << counter.elapsed() << "s)\n" );
FDEBUG( FDebug::Controller << "\t\t Computation : " << computationCounter.elapsed() << " s\n" );
FTRACE( FTrace::Controller.leaveFunction(FTrace::FMM) );
}
};
#endif //FFMMALGORITHMARRAY_HPP
// [--LICENSE--]
......@@ -17,6 +17,7 @@
#include "../Sources/Core/FTestKernels.hpp"
#include "../Sources/Core/FFMMAlgorithm.hpp"
#include "../Sources/Core/FFMMAlgorithmArray.hpp"
#include "../Sources/Core/FFMMAlgorithmThreaded.hpp"
// Compile by : g++ testFMMAlgorithm.cpp ../Sources/Utils/FAssertable.cpp ../Sources/Utils/FDebug.cpp ../Sources/Utils/FTrace.cpp -lgomp -fopenmp -O2 -o testFMMAlgorithm.exe
......@@ -73,8 +74,8 @@ int main(int , char ** ){
// FTestKernels FBasicKernels
FTestKernels<FTestParticule, FTestCell, NbLevels> kernels;
//FFMMAlgorithm FFMMAlgorithmThreaded
FFMMAlgorithmThreaded<FTestKernels, FTestParticule, FTestCell, NbLevels, SizeSubLevels> algo(&tree,&kernels);
//FFMMAlgorithm FFMMAlgorithmThreaded FFMMAlgorithmArray
FFMMAlgorithmArray<FTestKernels, FTestParticule, FTestCell, NbLevels, SizeSubLevels> algo(&tree,&kernels);
algo.execute();
counter.tac();
......
......@@ -18,6 +18,7 @@
#include "../Sources/Fmb/FExtendFmbCell.hpp"
#include "../Sources/Core/FFMMAlgorithm.hpp"
#include "../Sources/Core/FFMMAlgorithmArray.hpp"
#include "../Sources/Core/FFMMAlgorithmThreaded.hpp"
#include "../Sources/Fmb/FFmbKernelsPotentialForces.hpp"
......@@ -55,7 +56,7 @@ int main(int , char ** ){
const int NbLevels = 9;//10;
const int SizeSubLevels = 3;//3
FTic counter;
const char* const filename = "testLoaderFMA.fma"; //"testLoaderFMA.fma" "testFMAlgorithm.fma"
const char* const filename = "testLoaderFMA.fma"; //"testLoaderFMA.fma" "testFMAlgorithm.fma" Sphere.fma
FFMALoader<FmbParticule> loader(filename);
if(!loader.isValide()){
......@@ -98,7 +99,7 @@ int main(int , char ** ){
//FFmbKernelsPotentialForces FFmbKernelsForces FFmbKernelsPotential
FFmbKernelsPotentialForces<FmbParticule, FmbCell, NbLevels> kernels(NbLevels,loader.getBoxWidth());
//FFMMAlgorithm FFMMAlgorithmThreaded
//FFMMAlgorithm FFMMAlgorithmThreaded FFMMAlgorithmArray
FFMMAlgorithm<FFmbKernelsPotentialForces, FmbParticule, FmbCell, NbLevels, SizeSubLevels> algo(&tree,&kernels);
algo.execute();
......
// /!\ Please, you must read the license at the bottom of this page
#include <iostream>
#include <fstream>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <cmath>
// This file can generate basic particules files to load with basic loader
// g++ testLoaderCreateSphere.cpp -O2 -o testLoaderCreateSphere.exe
int main(int , char ** ){
// Nb of particules
const long NbParticules = 2000000;
// Center of the box
const float XCenter = 0.5;
const float YCenter = 0.5;
const float ZCenter = 0.5;
// Box width
const float BoxWidth = 1.0;
// Output file please let .temp extension
const char * const Output = "Sphere.fma";
// Create file
std::ofstream myfile;
myfile.open (Output);
if(!myfile.is_open()){
std::cout << "Cannot create " << Output << "\n";
return 1;
}
std::cout << "Creating " << NbParticules << " particules at " << Output << "\n";
std::cout << "Working...\n";
// System properties
myfile << NbParticules << " " << BoxWidth << " " << XCenter << " " << YCenter << " " << ZCenter;
const float rayon = 0.4;
const float thresh = 0.2;
// Generate particules
for( long idx = 0 ; idx < NbParticules ; ++idx ){
const float phi = ((float(rand())/RAND_MAX) * thresh - (thresh/2)) + rayon;
const float theta = M_PI * (float(rand())/RAND_MAX);
const float omega = 2 * M_PI * (float(rand())/RAND_MAX);
const float px = phi*cos(omega)*sin(theta) + XCenter;
const float py = phi*sin(omega)*cos(theta) + YCenter;
const float pz = phi*cos(theta) + ZCenter;
myfile << " \n" << px << " " << py << " " << pz << " 0.01";
}
myfile.close();
std::cout << "Done\n";
return 0;
}
// [--LICENSE--]
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment