Commit e664b855 authored by COULAUD Olivier's avatar COULAUD Olivier

add openmp tests

parent 3a263271
......@@ -157,7 +157,9 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/CMakeModules/morse/
set(SSE_FLAGS "-axSSE4.2 -march=native")
endif(APPLE)
#-Wshadow -Wpointer-arith -Wcast-qual -Wconversion -Wall -Wnosign-conversion ")
else()
elseif(CMAKE_CXX_COMPILER_ID STREQUAL "XL")
set(SCALFMM_CXX_FLAGS "${SCALFMM_CXX_FLAGS} -mcpu=power8 -mtune=power8")
else() #if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
# NOT INTEL
if(NOT SCALFMM_USE_MPI)
include(CheckCCompilerFlag)
......
......@@ -44,6 +44,7 @@
#ifdef _OPENMP
#include "Core/FFmmAlgorithmThread.hpp"
#include "Core/FFmmAlgorithmSectionTask.hpp"
#else
#include "Core/FFmmAlgorithm.hpp"
#endif
......@@ -66,206 +67,207 @@
// Simply create particles and try the kernels
int main(int argc, char* argv[])
{
FHelpDescribeAndExit(argc, argv,
"Driver for Lagrange interpolation kernel (1/r kernel).",
FParameterDefinitions::InputFile, FParameterDefinitions::OctreeHeight,
FParameterDefinitions::OctreeSubHeight, FParameterDefinitions::InputFile,FParameterDefinitions::OutputFile,
FParameterDefinitions::NbThreads);
FHelpDescribeAndExit(argc, argv,
"Driver for Lagrange interpolation kernel (1/r kernel).",
FParameterDefinitions::InputFile, FParameterDefinitions::OctreeHeight,
FParameterDefinitions::OctreeSubHeight, FParameterDefinitions::InputFile,FParameterDefinitions::OutputFile,
FParameterDefinitions::NbThreads);
const std::string defaultFile(SCALFMMDataPath+"unitCubeXYZQ100.bfma" );
const std::string filename = FParameters::getStr(argc,argv,FParameterDefinitions::InputFile.options, defaultFile.c_str());
const unsigned int TreeHeight = FParameters::getValue(argc, argv, FParameterDefinitions::OctreeHeight.options, 5);
const unsigned int SubTreeHeight = FParameters::getValue(argc, argv, FParameterDefinitions::OctreeSubHeight.options, 2);
const unsigned int NbThreads = FParameters::getValue(argc, argv, FParameterDefinitions::NbThreads.options, 1);
const std::string defaultFile(SCALFMMDataPath+"unitCubeXYZQ100.bfma" );
const std::string filename = FParameters::getStr(argc,argv,FParameterDefinitions::InputFile.options, defaultFile.c_str());
const unsigned int TreeHeight = FParameters::getValue(argc, argv, FParameterDefinitions::OctreeHeight.options, 5);
const unsigned int SubTreeHeight = FParameters::getValue(argc, argv, FParameterDefinitions::OctreeSubHeight.options, 2);
const unsigned int NbThreads = FParameters::getValue(argc, argv, FParameterDefinitions::NbThreads.options, 1);
#ifdef _OPENMP
omp_set_num_threads(NbThreads);
std::cout << "\n>> Using " << omp_get_max_threads() << " threads.\n" << std::endl;
omp_set_num_threads(NbThreads);
std::cout << "\n>> Using " << omp_get_max_threads() << " threads.\n" << std::endl;
#else
std::cout << "\n>> Sequential version.\n" << std::endl;
std::cout << "\n>> Sequential version.\n" << std::endl;
#endif
//
std::cout << "Parameters "<< std::endl
<< " Octree Depth "<< TreeHeight <<std::endl
<< " SubOctree depth " << SubTreeHeight <<std::endl
<< " Input file name: " <<filename <<std::endl
<< " Thread number: " << NbThreads <<std::endl
<<std::endl;
//
// init timer
FTic time;
// open particle file
////////////////////////////////////////////////////////////////////
//
typedef double FReal;
FFmaGenericLoader<FReal> loader(filename);
//
////////////////////////////////////////////////////////////////////
// begin Lagrange kernel
// accuracy
const unsigned int ORDER = 7;
// typedefs
typedef FP2PParticleContainerIndexed<FReal> ContainerClass;
typedef FSimpleLeaf<FReal, ContainerClass > LeafClass;
typedef FUnifCell<FReal,ORDER> CellClass;
typedef FOctree<FReal, CellClass,ContainerClass,LeafClass> OctreeClass;
//
typedef FInterpMatrixKernelR<FReal> MatrixKernelClass;
const MatrixKernelClass MatrixKernel;
typedef FUnifKernel<FReal,CellClass,ContainerClass,MatrixKernelClass,ORDER> KernelClass;
//
#ifdef _OPENMP
// typedef FFmmAlgorithmThread<OctreeClass,CellClass,ContainerClass,KernelClass,LeafClass> FmmClass;
typedef FFmmAlgorithmSectionTask<OctreeClass,CellClass,ContainerClass,KernelClass,LeafClass> FmmClass;
#else
typedef FFmmAlgorithm<OctreeClass,CellClass,ContainerClass,KernelClass,LeafClass> FmmClass;
#endif
// init oct-tree
OctreeClass tree(TreeHeight, SubTreeHeight, loader.getBoxWidth(), loader.getCenterOfBox());
{ // -----------------------------------------------------
std::cout << "Creating & Inserting " << loader.getNumberOfParticles()
<< " particles ..." << std::endl;
std::cout << "\tHeight : " << TreeHeight << " \t sub-height : " << SubTreeHeight << std::endl;
time.tic();
//
std::cout << "Parameters "<< std::endl
<< " Octree Depth "<< TreeHeight <<std::endl
<< " SubOctree depth " << SubTreeHeight <<std::endl
<< " Input file name: " <<filename <<std::endl
<< " Thread number: " << NbThreads <<std::endl
<<std::endl;
FPoint<FReal> position;
FReal physicalValue = 0.0;
//
// init timer
FTic time;
for(FSize idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){
//
// Read particle per particle from file
loader.fillParticle(&position,&physicalValue);
//
// put particle in octree
tree.insert(position, idxPart, physicalValue);
}
time.tac();
std::cout << "Done " << "(@Creating and Inserting Particles = "
<< time.elapsed() << " s) ." << std::endl;
} // -----------------------------------------------------
// open particle file
////////////////////////////////////////////////////////////////////
{ // -----------------------------------------------------
std::cout << "\nLagrange FMM (ORDER="<< ORDER << ") ... " << std::endl;
time.tic();
//
typedef double FReal;
FFmaGenericLoader<FReal> loader(filename);
std::unique_ptr<KernelClass> kernels(new KernelClass(TreeHeight, loader.getBoxWidth(), loader.getCenterOfBox(),&MatrixKernel));
//
////////////////////////////////////////////////////////////////////
// begin Lagrange kernel
// accuracy
const unsigned int ORDER = 7;
// typedefs
typedef FP2PParticleContainerIndexed<FReal> ContainerClass;
typedef FSimpleLeaf<FReal, ContainerClass > LeafClass;
typedef FUnifCell<FReal,ORDER> CellClass;
typedef FOctree<FReal, CellClass,ContainerClass,LeafClass> OctreeClass;
FmmClass algo(&tree, kernels.get());
//
typedef FInterpMatrixKernelR<FReal> MatrixKernelClass;
const MatrixKernelClass MatrixKernel;
typedef FUnifKernel<FReal,CellClass,ContainerClass,MatrixKernelClass,ORDER> KernelClass;
algo.execute(); // Here the call of the FMM algorithm
//
#ifdef _OPENMP
typedef FFmmAlgorithmThread<OctreeClass,CellClass,ContainerClass,KernelClass,LeafClass> FmmClass;
#else
typedef FFmmAlgorithm<OctreeClass,CellClass,ContainerClass,KernelClass,LeafClass> FmmClass;
#endif
// init oct-tree
OctreeClass tree(TreeHeight, SubTreeHeight, loader.getBoxWidth(), loader.getCenterOfBox());
{ // -----------------------------------------------------
std::cout << "Creating & Inserting " << loader.getNumberOfParticles()
<< " particles ..." << std::endl;
std::cout << "\tHeight : " << TreeHeight << " \t sub-height : " << SubTreeHeight << std::endl;
time.tic();
//
FPoint<FReal> position;
FReal physicalValue = 0.0;
//
for(FSize idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){
//
// Read particle per particle from file
loader.fillParticle(&position,&physicalValue);
//
// put particle in octree
tree.insert(position, idxPart, physicalValue);
}
time.tac();
std::cout << "Done " << "(@Creating and Inserting Particles = "
<< time.elapsed() << " s) ." << std::endl;
} // -----------------------------------------------------
{ // -----------------------------------------------------
std::cout << "\nLagrange FMM (ORDER="<< ORDER << ") ... " << std::endl;
time.tic();
//
std::unique_ptr<KernelClass> kernels(new KernelClass(TreeHeight, loader.getBoxWidth(), loader.getCenterOfBox(),&MatrixKernel));
//
FmmClass algo(&tree, kernels.get());
//
algo.execute(); // Here the call of the FMM algorithm
//
time.tac();
std::cout << "Timers Far Field \n"
<< "P2M " << algo.getTime(FAlgorithmTimers::P2MTimer) << " seconds\n"
<< "M2M " << algo.getTime(FAlgorithmTimers::M2MTimer) << " seconds\n"
<< "M2L " << algo.getTime(FAlgorithmTimers::M2LTimer) << " seconds\n"
<< "L2L " << algo.getTime(FAlgorithmTimers::L2LTimer) << " seconds\n"
<< "P2P and L2P " << algo.getTime(FAlgorithmTimers::NearTimer) << " seconds\n"
<< std::endl;
std::cout << "Done " << "(@Algorithm = " << time.elapsed() << " s) ." << std::endl;
}
// -----------------------------------------------------
time.tac();
std::cout << "Timers Far Field \n"
<< "P2M " << algo.getTime(FAlgorithmTimers::P2MTimer) << " seconds\n"
<< "M2M " << algo.getTime(FAlgorithmTimers::M2MTimer) << " seconds\n"
<< "M2L " << algo.getTime(FAlgorithmTimers::M2LTimer) << " seconds\n"
<< "L2L " << algo.getTime(FAlgorithmTimers::L2LTimer) << " seconds\n"
<< "P2P and L2P " << algo.getTime(FAlgorithmTimers::NearTimer) << " seconds\n"
<< std::endl;
std::cout << "Done " << "(@Algorithm = " << time.elapsed() << " s) ." << std::endl;
}
// -----------------------------------------------------
//
// Some output
//
//
{ // -----------------------------------------------------
FSize N1=0, N2= loader.getNumberOfParticles()/2, N3= loader.getNumberOfParticles() -1; ;
FReal energy =0.0 ;
//
// Some output
// Loop over all leaves
//
std::cout <<std::endl<<" &&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&& "<<std::endl;
std::cout << std::scientific;
std::cout.precision(10) ;
tree.forEachLeaf([&](LeafClass* leaf){
const FReal*const posX = leaf->getTargets()->getPositions()[0];
const FReal*const posY = leaf->getTargets()->getPositions()[1];
const FReal*const posZ = leaf->getTargets()->getPositions()[2];
const FReal*const potentials = leaf->getTargets()->getPotentials();
const FReal*const forcesX = leaf->getTargets()->getForcesX();
const FReal*const forcesY = leaf->getTargets()->getForcesY();
const FReal*const forcesZ = leaf->getTargets()->getForcesZ();
const FSize nbParticlesInLeaf = leaf->getTargets()->getNbParticles();
const FReal*const physicalValues = leaf->getTargets()->getPhysicalValues();
const FVector<FSize>& indexes = leaf->getTargets()->getIndexes();
for(FSize idxPart = 0 ; idxPart < nbParticlesInLeaf ; ++idxPart){
const FSize indexPartOrig = indexes[idxPart];
if ((indexPartOrig == N1) || (indexPartOrig == N2) || (indexPartOrig == N3) ) {
std::cout << "Index "<< indexPartOrig <<" potential " << potentials[idxPart]
<< " Pos "<<posX[idxPart]<<" "<<posY[idxPart]<<" "<<posZ[idxPart]
<< " Forces: " << forcesX[idxPart] << " " << forcesY[idxPart] << " "<< forcesZ[idxPart] <<std::endl;
}
energy += potentials[idxPart]*physicalValues[idxPart] ;
}
});
std::cout <<std::endl<<"Energy: "<< energy<<std::endl;
std::cout <<std::endl<<" &&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&& "<<std::endl<<std::endl;
}
// -----------------------------------------------------
if(FParameters::existParameter(argc, argv, FParameterDefinitions::OutputFile.options)){
std::string name(FParameters::getStr(argc,argv,FParameterDefinitions::OutputFile.options, "output.fma"));
FFmaGenericWriter<FReal> writer(name) ;
//
{ // -----------------------------------------------------
FSize N1=0, N2= loader.getNumberOfParticles()/2, N3= loader.getNumberOfParticles() -1; ;
FReal energy =0.0 ;
//
// Loop over all leaves
//
std::cout <<std::endl<<" &&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&& "<<std::endl;
std::cout << std::scientific;
std::cout.precision(10) ;
tree.forEachLeaf([&](LeafClass* leaf){
const FReal*const posX = leaf->getTargets()->getPositions()[0];
const FReal*const posY = leaf->getTargets()->getPositions()[1];
const FReal*const posZ = leaf->getTargets()->getPositions()[2];
const FReal*const potentials = leaf->getTargets()->getPotentials();
const FReal*const forcesX = leaf->getTargets()->getForcesX();
const FReal*const forcesY = leaf->getTargets()->getForcesY();
const FReal*const forcesZ = leaf->getTargets()->getForcesZ();
const FSize nbParticlesInLeaf = leaf->getTargets()->getNbParticles();
const FReal*const physicalValues = leaf->getTargets()->getPhysicalValues();
const FVector<FSize>& indexes = leaf->getTargets()->getIndexes();
for(FSize idxPart = 0 ; idxPart < nbParticlesInLeaf ; ++idxPart){
const FSize indexPartOrig = indexes[idxPart];
if ((indexPartOrig == N1) || (indexPartOrig == N2) || (indexPartOrig == N3) ) {
std::cout << "Index "<< indexPartOrig <<" potential " << potentials[idxPart]
<< " Pos "<<posX[idxPart]<<" "<<posY[idxPart]<<" "<<posZ[idxPart]
<< " Forces: " << forcesX[idxPart] << " " << forcesY[idxPart] << " "<< forcesZ[idxPart] <<std::endl;
}
energy += potentials[idxPart]*physicalValues[idxPart] ;
}
});
std::cout <<std::endl<<"Energy: "<< energy<<std::endl;
std::cout <<std::endl<<" &&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&& "<<std::endl<<std::endl;
FSize NbPoints = loader.getNumberOfParticles();
FReal * particles ;
particles = new FReal[8*NbPoints] ;
memset(particles,0,8*NbPoints*sizeof(FReal));
FSize j = 0 ;
tree.forEachLeaf([&](LeafClass* leaf){
//
// Input
const FReal*const posX = leaf->getTargets()->getPositions()[0];
const FReal*const posY = leaf->getTargets()->getPositions()[1];
const FReal*const posZ = leaf->getTargets()->getPositions()[2];
const FReal*const physicalValues = leaf->getTargets()->getPhysicalValues();
const FVector<FSize>& indexes = leaf->getTargets()->getIndexes();
//
// Computed data
const FReal*const potentials = leaf->getTargets()->getPotentials();
const FReal*const forcesX = leaf->getTargets()->getForcesX();
const FReal*const forcesY = leaf->getTargets()->getForcesY();
const FReal*const forcesZ = leaf->getTargets()->getForcesZ();
//
const FSize nbParticlesInLeaf = leaf->getTargets()->getNbParticles();
for(FSize idxPart = 0 ; idxPart < nbParticlesInLeaf ; ++idxPart){
j = 8*indexes[idxPart];
particles[j] = posX[idxPart] ;
particles[j+1] = posY[idxPart] ;
particles[j+2] = posZ[idxPart] ;
particles[j+3] = physicalValues[idxPart] ;
particles[j+4] = potentials[idxPart] ;
particles[j+5] = forcesX[idxPart] ;
particles[j+6] = forcesY[idxPart] ;
particles[j+7] = forcesZ[idxPart] ;
}
});
writer.writeHeader( loader.getCenterOfBox(), loader.getBoxWidth() , NbPoints, sizeof(FReal), 8) ;
writer.writeArrayOfReal(particles, 8 , NbPoints);
delete[] particles;
}
// -----------------------------------------------------
if(FParameters::existParameter(argc, argv, FParameterDefinitions::OutputFile.options)){
std::string name(FParameters::getStr(argc,argv,FParameterDefinitions::OutputFile.options, "output.fma"));
FFmaGenericWriter<FReal> writer(name) ;
//
FSize NbPoints = loader.getNumberOfParticles();
FReal * particles ;
particles = new FReal[8*NbPoints] ;
memset(particles,0,8*NbPoints*sizeof(FReal));
FSize j = 0 ;
tree.forEachLeaf([&](LeafClass* leaf){
//
// Input
const FReal*const posX = leaf->getTargets()->getPositions()[0];
const FReal*const posY = leaf->getTargets()->getPositions()[1];
const FReal*const posZ = leaf->getTargets()->getPositions()[2];
const FReal*const physicalValues = leaf->getTargets()->getPhysicalValues();
const FVector<FSize>& indexes = leaf->getTargets()->getIndexes();
//
// Computed data
const FReal*const potentials = leaf->getTargets()->getPotentials();
const FReal*const forcesX = leaf->getTargets()->getForcesX();
const FReal*const forcesY = leaf->getTargets()->getForcesY();
const FReal*const forcesZ = leaf->getTargets()->getForcesZ();
//
const FSize nbParticlesInLeaf = leaf->getTargets()->getNbParticles();
for(FSize idxPart = 0 ; idxPart < nbParticlesInLeaf ; ++idxPart){
j = 8*indexes[idxPart];
particles[j] = posX[idxPart] ;
particles[j+1] = posY[idxPart] ;
particles[j+2] = posZ[idxPart] ;
particles[j+3] = physicalValues[idxPart] ;
particles[j+4] = potentials[idxPart] ;
particles[j+5] = forcesX[idxPart] ;
particles[j+6] = forcesY[idxPart] ;
particles[j+7] = forcesZ[idxPart] ;
}
});
writer.writeHeader( loader.getCenterOfBox(), loader.getBoxWidth() , NbPoints, sizeof(FReal), 8) ;
writer.writeArrayOfReal(particles, 8 , NbPoints);
delete[] particles;
//
std::string name1( "output.fma");
//
FFmaGenericWriter<FReal> writer1(name1) ;
writer1.writeDistributionOfParticlesFromOctree(&tree,NbPoints) ;
}
//
std::string name1( "output.fma");
//
FFmaGenericWriter<FReal> writer1(name1) ;
writer1.writeDistributionOfParticlesFromOctree(&tree,NbPoints) ;
}
return 0;
return 0;
}
......@@ -31,16 +31,16 @@ echo $DATE
#
# INTEL
#
module add compiler/gcc/5.1.0 compiler/intel/64/2016_beta
cd $project_dir/BuildIntel2016
COMP=INTEL
export KMP_AFFINITY=scatter
#module add compiler/gcc/5.1.0 compiler/intel/64/2016_beta
#cd $project_dir/BuildIntel2016
#COMP=INTEL
#export KMP_AFFINITY=scatter
#
# GCC
#
#module add compiler/gcc/5.1.0 intel/mkl/64/11.2/2015.3.187
#cd $project_dir/BuildGCC51
#COMP=GCC
module add compiler/gcc/5.1.0 intel/mkl/64/11.2/2015.3.187
cd $project_dir/BuildGCC51
COMP=GCC
#
# For eztrace
#
......@@ -62,7 +62,7 @@ ALGO=(basic balanced task tasknew sectiontask sectiontasknew)
#ALGO=( task )
#
pwd
#export OMP_PROC_BIND=true
export OMP_PROC_BIND=true
NUM=`git rev-list HEAD --count`
echo $DISTRIB
REP=${DATE}-${COMP}-${METH}
......
......@@ -8,22 +8,22 @@
#SBATCH --nodes=1
#SBATCH --ntasks-per-node=24
#
source $HOME/Config/bashrc.bash
#source $HOME/Config/bashrc.bash
project_dir=/projets/scalfmm/scalfmm-tests ;
project_dir=$HOME/Dev/src/ScalFMM/scalfmm ;
#
# PlaFRIM environment
#
module add compiler/gcc/5.1.0 compiler/intel/64/2016_beta; module li
module add compiler/gcc/6.1.0 compiler/intel/64/20176_beta; module li
#
EXEC="Examples/Release/ChebyshevInterpolationFMM"
FILEPERF="RES_Chebyshev-openmp"
#EXEC="Examples/Release/LagrangeInterpolationFMM"
#FILEPERF="RES_Lagrange-openmp"
FILE="unitcube_2M.bfma"
#EXEC="Examples/Release/ChebyshevInterpolationFMM"
#FILEPERF="RES_Chebyshev-openmp"
EXEC="Examples/Release/LagrangeInterpolationFMM"
FILEPERF="RES_Lagrange-openmp"
FILE="/projets/scalfmm/data/tests/unitCube20M.bfma"
#
cd $project_dir/BuildIntel2016
cd $project_dir/BuildIntel
#
#
PER_SIZE=`cat /proc/cpuinfo |grep processor |wc -l`
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment