Commit c5b84f83 authored by PIACIBELLO Cyrille's avatar PIACIBELLO Cyrille

Chebyshev and Uniform kernels can be used now with distributed algorithm. 2...

Chebyshev and Uniform kernels can be used now with distributed algorithm. 2 test files have been added to use these kernels with FmmAlgorithmThreadProc
parent 57d76530
......@@ -75,6 +75,10 @@ public:
buffer >> dataDown >> dataUp;
}
static int GetSize(){
return sizeof(long long int)*2;
}
/////////////////////////////////////////////////
/** Serialize only up data in a buffer */
......
......@@ -62,7 +62,7 @@
template<class OctreeClass, class CellClass, class ContainerClass, class KernelClass, class LeafClass>
class FFmmAlgorithmThreadProc : public FAbstractAlgorithm {
static const int MaxSizePerCell = 1024;
const int MaxSizePerCell = CellClass::GetSize();
OctreeClass* const tree; //< The octree to work on
KernelClass** kernels; //< The kernels
......@@ -191,7 +191,7 @@ public:
currentLimit >>= 3;
}
}
printf("Proc::%d From leaf %lld to leaf %lld\n",idProcess,myLastInterval.min,myLastInterval.max);
// We get the min/max indexes from each procs
FMpi::MpiAssert( MPI_Allgather( myIntervals, int(sizeof(Interval)) * OctreeHeight, MPI_BYTE,
workingIntervalsPerLevel, int(sizeof(Interval)) * OctreeHeight, MPI_BYTE, comm.getComm()), __LINE__ );
......@@ -325,43 +325,44 @@ private:
if( child[idxChild] && getWorkingInterval((idxLevel+1), idProcess).min <= child[idxChild]->getMortonIndex() ){
child[idxChild]->serializeUp(sendBuffer);
state = char(state | (0x1 << idxChild));
}
}
sendBuffer.writeAt(0,state);
while( sendToProc && iterArray[cellsToSend].getCurrentGlobalIndex() == getWorkingInterval(idxLevel , sendToProc - 1).max){
while( sendToProc && iterArray[cellsToSend].getCurrentGlobalIndex() <= getWorkingInterval(idxLevel , sendToProc - 1).max){
--sendToProc;
}
MPI_Isend(sendBuffer.data(), sendBuffer.getSize(), MPI_PACKED, sendToProc,
FMpi::TagFmmM2M, comm.getComm(), &requests[iterRequests++]);
}
// We may need to receive something
bool hasToReceive = false;
int endProcThatSend = firstProcThatSend;
if(idProcess != nbProcess - 1){ // if I'm the last one (idProcess == nbProcess-1), I shall not receive anything in a M2M
while(firstProcThatSend < nbProcess
&& (getWorkingInterval((idxLevel+1), firstProcThatSend).max) < (getWorkingInterval((idxLevel+1), idProcess).max)){
&& (getWorkingInterval((idxLevel+1), firstProcThatSend).max) <= (getWorkingInterval((idxLevel+1), idProcess).max)){
// Second condition :: while firstProcThatSend max morton index is < to myself max interval
++firstProcThatSend;
}
if(firstProcThatSend < nbProcess &&
(getWorkingInterval((idxLevel+1), firstProcThatSend).min >>3) == (getWorkingInterval((idxLevel+1) , idProcess).max>>3) ){
(getWorkingInterval((idxLevel+1), firstProcThatSend).min >>3) <= (getWorkingInterval((idxLevel+1) , idProcess).max>>3) ){
endProcThatSend = firstProcThatSend;
while( endProcThatSend < nbProcess &&
(getWorkingInterval((idxLevel+1) ,endProcThatSend).min >>3) == (getWorkingInterval((idxLevel+1) , idProcess).max>>3)){
(getWorkingInterval((idxLevel+1) ,endProcThatSend).min >>3) <= (getWorkingInterval((idxLevel+1) , idProcess).max>>3)){
++endProcThatSend;
}
if(firstProcThatSend != endProcThatSend){
hasToReceive = true;
for(int idxProc = firstProcThatSend ; idxProc < endProcThatSend ; ++idxProc ){
MPI_Irecv(&recvBuffer.data()[idxProc * recvBufferOffset], recvBufferOffset, MPI_PACKED,
idxProc, FMpi::TagFmmM2M, comm.getComm(), &requests[iterRequests++]);
......@@ -369,6 +370,7 @@ private:
}
}
}
FLOG(prepareCounter.tac());
FTRACE( regionTrace.end() );
......@@ -395,7 +397,7 @@ private:
if( hasToReceive ){
CellClass* currentChild[8];
memcpy(currentChild, iterArray[numberOfCells - 1].getCurrentChild(), 8 * sizeof(CellClass*));
// retreive data and merge my child and the child from others
for(int idxProc = firstProcThatSend ; idxProc < endProcThatSend ; ++idxProc){
recvBuffer.seek(idxProc * recvBufferOffset);
......@@ -407,12 +409,12 @@ private:
state >>= 1;
++position;
}
FAssertLF(!currentChild[position], "Already has a cell here");
FAssertLF(!currentChild[position], "Already has a cell here");
recvBufferCells[position].deserializeUp(recvBuffer);
currentChild[position] = (CellClass*) &recvBufferCells[position];
state >>= 1;
++position;
}
......@@ -423,7 +425,6 @@ private:
(*kernels[0]).M2M( iterArray[numberOfCells - 1].getCurrentCell() , currentChild, idxLevel);
FLOG(computationCounter.tac());
firstProcThatSend = endProcThatSend - 1;
}
}
......@@ -815,7 +816,7 @@ private:
const int heightMinusOne = OctreeHeight - 1;
FMpiBufferWriter sendBuffer(comm.getComm());
FMpiBufferWriter sendBuffer(comm.getComm(),MaxSizePerCell);
FMpiBufferReader recvBuffer(comm.getComm(),MaxSizePerCell);
// for each levels exepted leaf level
......
......@@ -16,7 +16,7 @@
#ifndef FCHEBCELL_HPP
#define FCHEBCELL_HPP
#include "../../Components/FBasicCell.hpp"
#include "../../Extensions/FExtendMortonIndex.hpp"
#include "../../Extensions/FExtendCoordinate.hpp"
......@@ -24,76 +24,118 @@
#include "../../Extensions/FExtendCellType.hpp"
/**
* @author Matthias Messner (matthias.messner@inria.fr)
* @class FChebCell
* Please read the license
*
* This class defines a cell used in the Chebyshev based FMM.
* @param NVALS is the number of right hand side.
*/
* @author Matthias Messner (matthias.messner@inria.fr)
* @class FChebCell
* Please read the license
*
* This class defines a cell used in the Chebyshev based FMM.
* @param NVALS is the number of right hand side.
*/
template <int ORDER, int NVALS = 1>
class FChebCell : public FExtendMortonIndex, public FExtendCoordinate
class FChebCell : public FBasicCell
{
static const int VectorSize = TensorTraits<ORDER>::nnodes * 2;
static const int VectorSize = TensorTraits<ORDER>::nnodes * 2;
FReal multipole_exp[NVALS * VectorSize]; //< Multipole expansion
FReal local_exp[NVALS * VectorSize]; //< Local expansion
FReal multipole_exp[NVALS * VectorSize]; //< Multipole expansion
FReal local_exp[NVALS * VectorSize]; //< Local expansion
public:
FChebCell(){
memset(multipole_exp, 0, sizeof(FReal) * NVALS * VectorSize);
memset(local_exp, 0, sizeof(FReal) * NVALS * VectorSize);
}
FChebCell(){
memset(multipole_exp, 0, sizeof(FReal) * NVALS * VectorSize);
memset(local_exp, 0, sizeof(FReal) * NVALS * VectorSize);
}
~FChebCell() {}
~FChebCell() {}
/** Get Multipole */
const FReal* getMultipole(const int inRhs) const
{ return this->multipole_exp + inRhs*VectorSize;
}
/** Get Local */
const FReal* getLocal(const int inRhs) const{
return this->local_exp + inRhs*VectorSize;
}
/** Get Multipole */
const FReal* getMultipole(const int inRhs) const
{ return this->multipole_exp + inRhs*VectorSize;
}
/** Get Local */
const FReal* getLocal(const int inRhs) const{
return this->local_exp + inRhs*VectorSize;
}
/** Get Multipole */
FReal* getMultipole(const int inRhs){
return this->multipole_exp + inRhs*VectorSize;
}
/** Get Local */
FReal* getLocal(const int inRhs){
return this->local_exp + inRhs*VectorSize;
}
/** Get Multipole */
FReal* getMultipole(const int inRhs){
return this->multipole_exp + inRhs*VectorSize;
}
/** Get Local */
FReal* getLocal(const int inRhs){
return this->local_exp + inRhs*VectorSize;
}
/** To get the leading dim of a vec */
int getVectorSize() const{
return VectorSize;
}
/** To get the leading dim of a vec */
int getVectorSize() const{
return VectorSize;
}
/** Make it like the begining */
void resetToInitialState(){
memset(multipole_exp, 0, sizeof(FReal) * NVALS * VectorSize);
memset(local_exp, 0, sizeof(FReal) * NVALS * VectorSize);
}
///////////////////////////////////////////////////////
// to extend FAbstractSendable
///////////////////////////////////////////////////////
template <class BufferWriterClass>
void serializeUp(BufferWriterClass& buffer) const{
buffer.write(multipole_exp, VectorSize*NVALS);
}
template <class BufferReaderClass>
void deserializeUp(BufferReaderClass& buffer){
buffer.fillArray(multipole_exp, VectorSize*NVALS);
}
template <class BufferWriterClass>
void serializeDown(BufferWriterClass& buffer) const{
buffer.write(local_exp, VectorSize*NVALS);
}
template <class BufferReaderClass>
void deserializeDown(BufferReaderClass& buffer){
buffer.fillArray(local_exp, VectorSize*NVALS);
}
///////////////////////////////////////////////////////
// to extend Serializable
///////////////////////////////////////////////////////
template <class BufferWriterClass>
void save(BufferWriterClass& buffer) const{
FBasicCell::save(buffer);
buffer.write(multipole_exp, VectorSize);
buffer.write(local_exp, VectorSize);
}
template <class BufferReaderClass>
void restore(BufferReaderClass& buffer){
FBasicCell::restore(buffer);
buffer.fillArray(multipole_exp, VectorSize);
buffer.fillArray(local_exp, VectorSize);
}
static int GetSize(){
return sizeof(FReal)*VectorSize*2*NVALS;
}
/** Make it like the begining */
void resetToInitialState(){
memset(multipole_exp, 0, sizeof(FReal) * NVALS * VectorSize);
memset(local_exp, 0, sizeof(FReal) * NVALS * VectorSize);
}
};
template <int ORDER, int NVALS = 1>
class FTypedChebCell : public FChebCell<ORDER,NVALS>, public FExtendCellType {
public:
template <class BufferWriterClass>
void save(BufferWriterClass& buffer) const{
FChebCell<ORDER,NVALS>::save(buffer);
FExtendCellType::save(buffer);
}
template <class BufferReaderClass>
void restore(BufferReaderClass& buffer){
FChebCell<ORDER,NVALS>::restore(buffer);
FExtendCellType::restore(buffer);
}
void resetToInitialState(){
FChebCell<ORDER,NVALS>::resetToInitialState();
FExtendCellType::resetToInitialState();
}
template <class BufferWriterClass>
void save(BufferWriterClass& buffer) const{
FChebCell<ORDER,NVALS>::save(buffer);
FExtendCellType::save(buffer);
}
template <class BufferReaderClass>
void restore(BufferReaderClass& buffer){
FChebCell<ORDER,NVALS>::restore(buffer);
FExtendCellType::restore(buffer);
}
void resetToInitialState(){
FChebCell<ORDER,NVALS>::resetToInitialState();
FExtendCellType::resetToInitialState();
}
};
#endif //FCHEBCELL_HPP
......
......@@ -92,6 +92,12 @@ public:
return local_exp;
}
const int getArraySize() const
{
return MultipoleSize;
}
/** Make it like the begining */
void resetToInitialState(){
for(int idx = 0 ; idx < MultipoleSize ; ++idx){
......@@ -138,6 +144,9 @@ public:
buffer.fillArray(multipole_exp, MultipoleSize);
buffer.fillArray(local_exp, LocalSize);
}
static int GetSize(){
return ((int) sizeof(FComplexe)) * (MultipoleSize + LocalSize);
}
};
template <int P>
......
......@@ -151,6 +151,10 @@ public:
buffer.fillArray(multipole_exp, PoleSize);
buffer.fillArray(local_exp, LocalSize);
}
static int GetSize(){
return (int) sizeof(FComplexe) * (PoleSize+LocalSize);
}
};
int FSphericalCell::DevP(-1);
......
......@@ -21,7 +21,7 @@
#include "../../Extensions/FExtendCoordinate.hpp"
#include "./FUnifTensor.hpp"
#include "../../Components/FBasicCell.hpp"
#include "../../Extensions/FExtendCellType.hpp"
#include "../../Utils/FComplexe.hpp"
......@@ -43,7 +43,7 @@
* @param NVALS is the number of right hand side.
*/
template <int ORDER, int NVALS = 1>
class FUnifCell : public FExtendMortonIndex, public FExtendCoordinate
class FUnifCell : public FBasicCell
{
static const int VectorSize = TensorTraits<ORDER>::nnodes;
static const int TransformedVectorSize = (2*ORDER-1)*(2*ORDER-1)*(2*ORDER-1);
......@@ -118,6 +118,59 @@ public:
return this->transformed_local_exp + inRhs*TransformedVectorSize;
}
///////////////////////////////////////////////////////
// to extend FAbstractSendable
///////////////////////////////////////////////////////
template <class BufferWriterClass>
void serializeUp(BufferWriterClass& buffer) const{
buffer.write(multipole_exp, VectorSize * NVALS);
buffer.write(transformed_multipole_exp, VectorSize * NVALS);
}
template <class BufferReaderClass>
void deserializeUp(BufferReaderClass& buffer){
buffer.fillArray(multipole_exp, VectorSize*NVALS);
buffer.fillArray(transformed_multipole_exp, VectorSize*NVALS);
}
template <class BufferWriterClass>
void serializeDown(BufferWriterClass& buffer) const{
buffer.write(local_exp, VectorSize*NVALS);
buffer.write(transformed_local_exp, VectorSize * NVALS);
}
template <class BufferReaderClass>
void deserializeDown(BufferReaderClass& buffer){
buffer.fillArray(local_exp, VectorSize*NVALS);
buffer.fillArray(transformed_local_exp, VectorSize*NVALS);
}
///////////////////////////////////////////////////////
// to extend Serializable
///////////////////////////////////////////////////////
template <class BufferWriterClass>
void save(BufferWriterClass& buffer) const{
FBasicCell::save(buffer);
buffer.write(multipole_exp, VectorSize*NVALS);
buffer.write(transformed_multipole_exp, VectorSize*NVALS);
buffer.write(local_exp, VectorSize*NVALS);
buffer.write(transformed_local_exp, VectorSize*NVALS);
}
template <class BufferReaderClass>
void restore(BufferReaderClass& buffer){
FBasicCell::restore(buffer);
buffer.fillArray(multipole_exp, VectorSize*NVALS);
buffer.fillArray(transformed_multipole_exp, VectorSize*NVALS);
buffer.fillArray(local_exp, VectorSize*NVALS);
buffer.fillArray(transformed_local_exp, VectorSize*NVALS);
}
static int GetSize(){
return 2 * (int) sizeof(FReal)*VectorSize*NVALS + 2*NVALS*TransformedVectorSize*(int) sizeof(FComplexe);
}
};
template <int ORDER, int NVALS = 1>
......
This diff is collapsed.
// ===================================================================================
// Ce LOGICIEL "ScalFmm" est couvert par le copyright Inria 20xx-2012.
// Inria détient tous les droits de propriété sur le LOGICIEL, et souhaite que
// la communauté scientifique l'utilise afin de le tester et de l'évaluer.
// Inria donne gracieusement le droit d'utiliser ce LOGICIEL. Toute utilisation
// dans un but lucratif ou à des fins commerciales est interdite sauf autorisation
// expresse et préalable d'Inria.
// Toute utilisation hors des limites précisées ci-dessus et réalisée sans l'accord
// expresse préalable d'Inria constituerait donc le délit de contrefaçon.
// Le LOGICIEL étant un produit en cours de développement, Inria ne saurait assurer
// aucune responsabilité et notamment en aucune manière et en aucun cas, être tenu
// de répondre d'éventuels dommages directs ou indirects subits par l'utilisateur.
// Tout utilisateur du LOGICIEL s'engage à communiquer à Inria ses remarques
// relatives à l'usage du LOGICIEL
// ===================================================================================
// ==== CMAKE =====
// @FUSE_BLAS
// ================
#include <iostream>
#include <cstdio>
#include <cstdlib>
#include "../../Src/Kernels/Uniform/FUnifCell.hpp"
#include "../../Src/Kernels/Interpolation/FInterpMatrixKernel.hpp"
#include "../../Src/Kernels/Uniform/FUnifKernel.hpp"
#include "../../Src/Components/FSimpleLeaf.hpp"
#include "../../Src/Kernels/P2P/FP2PParticleContainerIndexed.hpp"
#include "../../Src/Utils/FParameters.hpp"
#include "../../Src/Utils/FMemUtils.hpp"
#include "../../Src/Containers/FOctree.hpp"
#include "../../Src/Containers/FVector.hpp"
#include "../../Src/Files/FFmaScanfLoader.hpp"
#include "../../Src/Files/FFmaBinLoader.hpp"
#include "../../Src/Files/FMpiFmaLoader.hpp"
#include "../../Src/Files/FMpiTreeBuilder.hpp"
#include "../../Src/Core/FFmmAlgorithm.hpp"
#include "../../Src/Core/FFmmAlgorithmThread.hpp"
#include "../../Src/Core/FFmmAlgorithmThreadProc.hpp"
/**
* This program runs the FMM Algorithm Distributed with the Uniform kernel
*/
// Simply create particles and try the kernel
int main(int argc, char* argv[])
{
const unsigned int ORDER = 7;
const FReal epsilon = FReal(1e-7);
typedef FP2PParticleContainerIndexed ContainerClass;
typedef FSimpleLeaf< ContainerClass > LeafClass;
typedef FInterpMatrixKernelR MatrixKernelClass;
typedef FUnifCell<ORDER> CellClass;
typedef FOctree<CellClass,ContainerClass,LeafClass> OctreeClass;
typedef FUnifKernel<CellClass,ContainerClass,MatrixKernelClass,ORDER> KernelClass;
typedef FFmmAlgorithmThreadProc<OctreeClass,CellClass,ContainerClass,KernelClass,LeafClass> FmmClass;
FMpi app(argc,argv);
const char* const filename = FParameters::getStr(argc,argv,"-f", "../Data/test20k.fma");
const unsigned int TreeHeight = FParameters::getValue(argc, argv, "-h", 5);
const unsigned int SubTreeHeight = FParameters::getValue(argc, argv, "-sh", 2);
const unsigned int NbThreads = FParameters::getValue(argc, argv, "-t", 1);
std::cout << ">> This executable has to be used to test Proc Uniform Algorithm. \n";
#ifdef _OPENMP
omp_set_num_threads(NbThreads);
std::cout << "\n>> Using " << omp_get_max_threads() << " threads.\n" << std::endl;
#else
std::cout << "\n>> Sequential version.\n" << std::endl;
#endif
std::cout << "Opening : " <<filename << "\n" << std::endl;
// init timer
FTic time;
// init particles position and physical value
struct TestParticle{
FPoint position;
FReal physicalValue;
const FPoint& getPosition(){
return position;
}
};
// open particle file
FMpiFmaLoader loader(filename,app.global());
if(!loader.isOpen()) throw std::runtime_error("Particle file couldn't be opened!");
OctreeClass tree(TreeHeight, SubTreeHeight,loader.getBoxWidth(),loader.getCenterOfBox());
time.tic();
TestParticle* particles = new TestParticle[loader.getNumberOfParticles()];
memset(particles,0,(unsigned int) (sizeof(TestParticle)* loader.getNumberOfParticles()));
for(int idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){
loader.fillParticle(&particles[idxPart].position,&particles[idxPart].physicalValue);
}
FVector<TestParticle> finalParticles;
FMpiTreeBuilder< TestParticle >::ArrayToTree(app.global(), particles, loader.getNumberOfParticles(),
tree.getBoxCenter(),
tree.getBoxWidth(),
tree.getHeight(), &finalParticles);
{ // -----------------------------------------------------
std::cout << "Creating & Inserting " << loader.getNumberOfParticles()
<< " particles ..." << std::endl;
std::cout << "\tHeight : " << TreeHeight << " \t sub-height : " << SubTreeHeight << std::endl;
time.tic();
for(int idxPart = 0 ; idxPart < finalParticles.getSize() ; ++idxPart){
// put in tree
tree.insert(finalParticles[idxPart].position, idxPart, finalParticles[idxPart].physicalValue);
}
time.tac();
std::cout << "Done " << "(@Creating and Inserting Particles = "
<< time.elapsed() << "s)." << std::endl;
} // -----------------------------------------------------
{ // -----------------------------------------------------
std::cout << "\nChebyshev FMM (ORDER="<< ORDER << ",EPS="<< epsilon <<") ... " << std::endl;
time.tic();
KernelClass kernels(TreeHeight, loader.getCenterOfBox(), loader.getBoxWidth());
FmmClass algorithm(app.global(),&tree, &kernels);
algorithm.execute();
time.tac();
std::cout << "Done " << "(@Algorithm = " << time.elapsed() << "s)." << std::endl;
} // -----------------------------------------------------
{ // -----------------------------------------------------
std::cout << "\nError computation ... " << std::endl;
FReal potential;
FReal fx, fy, fz;
{ // Check that each particle has been summed with all other
tree.forEachLeaf([&](LeafClass* leaf){
const FReal*const potentials = leaf->getTargets()->getPotentials();
const FReal*const forcesX = leaf->getTargets()->getForcesX();
const FReal*const forcesY = leaf->getTargets()->getForcesY();
const FReal*const forcesZ = leaf->getTargets()->getForcesZ();
const int nbParticlesInLeaf = leaf->getTargets()->getNbParticles();
for(int idxPart = 0 ; idxPart < nbParticlesInLeaf ; ++idxPart){
potential += potentials[idxPart];
fx += forcesX[idxPart];
fy += forcesY[idxPart];
fz += forcesZ[idxPart];
}
});
}
// Print for information
std::cout << "Potential " << potential << std::endl;
std::cout << "Fx " << fx << std::endl;
std::cout << "Fy " << fy << std::endl;
std::cout << "Fz " << fz << std::endl;
} // end Chebyshev kernel
return 0;
}
......@@ -351,7 +351,7 @@ int main(int argc, char ** argv){
TestParticle* particles = new TestParticle[loader.getNumberOfParticles()];
memset(particles, 0, sizeof(TestParticle) * loader.getNumberOfParticles());
FReal physicalValue; //unused
FReal physicalValue;
for(int idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){
loader.fillParticle(&particles[idxPart].position,&physicalValue);
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment