Commit c05b8cfb authored by BRAMAS Berenger's avatar BRAMAS Berenger
parents e08b3205 38a55732
......@@ -47,6 +47,19 @@
## Intel10_64lp_seq (intel mkl v10 64 bit,sequential code, lp64 model),
## Intel( older versions of mkl 32 and 64 bit), ACML,ACML_MP,ACML_GPU,Apple, NAS, Generic
# C/CXX should be enabled to use Intel mkl
###
# We handle different modes to find the dependency
#
# - Detection if already installed on the system
# - BLAS libraries can be detected from different ways
# Here is the order of precedence:
# 1) we look in cmake variable BLAS_LIBDIR or BLAS_DIR (we guess the libdirs) if defined
# 2) we look in environnement variable BLAS_LIBDIR or BLAS_DIR (we guess the libdirs) if defined
# 3) we look in common environnment variables depending on the system (INCLUDE, C_INCLUDE_PATH, CPATH - LIB, DYLD_LIBRARY_PATH, LD_LIBRARY_PATH)
# 4) we look in common system paths depending on the system, see for example paths contained in the following cmake variables:
# - CMAKE_PLATFORM_IMPLICIT_INCLUDE_DIRECTORIES, CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES
# - CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES, CMAKE_C_IMPLICIT_LINK_DIRECTORIES
#
#=============================================================================
# Copyright 2007-2009 Kitware, Inc.
......
......@@ -48,6 +48,19 @@
# look for a stand alone cblas, please add the following in your
# CMakeLists.txt before to call find_package(CBLAS):
# set(CBLAS_STANDALONE TRUE)
###
# We handle different modes to find the dependency
#
# - Detection if already installed on the system
# - CBLAS libraries can be detected from different ways
# Here is the order of precedence:
# 1) we look in cmake variable CBLAS_LIBDIR or CBLAS_DIR (we guess the libdirs) if defined
# 2) we look in environnement variable CBLAS_LIBDIR or CBLAS_DIR (we guess the libdirs) if defined
# 3) we look in common environnment variables depending on the system (INCLUDE, C_INCLUDE_PATH, CPATH - LIB, DYLD_LIBRARY_PATH, LD_LIBRARY_PATH)
# 4) we look in common system paths depending on the system, see for example paths contained in the following cmake variables:
# - CMAKE_PLATFORM_IMPLICIT_INCLUDE_DIRECTORIES, CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES
# - CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES, CMAKE_C_IMPLICIT_LINK_DIRECTORIES
#
#=============================================================================
# Copyright 2012-2013 Inria
......@@ -80,11 +93,7 @@ endif()
if (CBLAS_FIND_COMPONENTS)
foreach( component ${CBLAS_FIND_COMPONENTS} )
if(CBLAS_FIND_REQUIRED_${component})
if (CBLAS_FIND_REQUIRED)
find_package(${component} REQUIRED)
else()
find_package(${component})
endif()
find_package(${component} REQUIRED)
else()
find_package(${component})
endif()
......
......@@ -310,7 +310,12 @@ if( (NOT PKG_CONFIG_EXECUTABLE) OR (PKG_CONFIG_EXECUTABLE AND NOT STARPU_FOUND)
set(STARPU_${starpu_hdr}_INCLUDE_DIRS "STARPU_${starpu_hdr}_INCLUDE_DIRS-NOTFOUND")
find_path(STARPU_${starpu_hdr}_INCLUDE_DIRS
NAMES ${starpu_hdr}
HINTS ${_inc_env})
HINTS ${_inc_env}
PATH_SUFFIXES
"starpu/1.0"
"starpu/1.1"
"starpu/1.2"
"starpu/1.3")
endforeach()
endif()
endif()
......
......@@ -40,7 +40,7 @@
* Of course this class does not deallocate pointer given in arguements.
*/
template<class OctreeClass, class CellClass, class ContainerClass, class KernelClass, class LeafClass>
class FFmmAlgorithmTask : public FAbstractAlgorithm{
class FFmmAlgorithmTask : public FAbstractAlgorithm, public FAlgorithmTimers {
OctreeClass* const tree; //< The octree to work on
KernelClass** kernels; //< The kernels
......@@ -92,15 +92,30 @@ protected:
*/
void executeCore(const unsigned operationsToProceed) override {
if(operationsToProceed & FFmmP2M) bottomPass();
if(operationsToProceed & FFmmM2M) upwardPass();
if(operationsToProceed & FFmmM2L) transferPass();
if(operationsToProceed & FFmmL2L) downardPass();
if((operationsToProceed & FFmmP2P) || (operationsToProceed & FFmmL2P)) directPass((operationsToProceed & FFmmP2P),(operationsToProceed & FFmmL2P));
Timers[P2MTimer].tic();
if(operationsToProceed & FFmmP2M)
bottomPass();
Timers[P2MTimer].tac();
Timers[M2MTimer].tic();
if(operationsToProceed & FFmmM2M)
upwardPass();
Timers[M2MTimer].tac();
Timers[M2LTimer].tic();
if(operationsToProceed & FFmmM2L)
transferPass();
Timers[M2LTimer].tac();
Timers[L2LTimer].tic();
if(operationsToProceed & FFmmL2L)
downardPass();
Timers[L2LTimer].tac();
Timers[NearTimer].tic();
if( (operationsToProceed & FFmmP2P) || (operationsToProceed & FFmmL2P) )
directPass((operationsToProceed & FFmmP2P),(operationsToProceed & FFmmL2P));
Timers[NearTimer].tac();
}
/////////////////////////////////////////////////////////////////////////////
......
......@@ -78,6 +78,7 @@ public:
* The constructor needs the octree and the kernels used for computation.
* \param inTree the octree to work on.
* \param inKernels the kernels to call.
* \param inStaticSchedule Whether to use static or dynamic OpenMP scheduling.
*
* \except An exception is thrown if one of the arguments is NULL.
*/
......
......@@ -60,6 +60,7 @@
// Algorithms
#include "Core/FFmmAlgorithm.hpp"
#include "Core/FFmmAlgorithmThread.hpp"
#include "Core/FFmmAlgorithmTask.hpp"
#include "BalanceTree/FFmmAlgorithmThreadBalanced.hpp"
#include "BalanceTree/FCostZones.hpp"
......@@ -88,7 +89,7 @@ protected:
template <class OctreeClass>
void loadTree(FFmaGenericLoader<FReal>& loader, OctreeClass& tree) {
std::cout << "Creating & inserting particles";
std::cout << "Creating & inserting particles" << std::flush;
time.tic();
......@@ -183,6 +184,63 @@ public: // typedefs
using FmmClass = FFmmAlgorithmThread<OctreeClass, CellClass, ContainerClass, KernelClass, LeafClass>;
protected:
int _nbThreads;
FFmaGenericLoader<FReal> _loader;
OctreeClass _tree;
FmmClass* _algo;
bool _ompStaticScheduling;
public:
PerfTest(const std::string& fileName, const int nbThreads, const int treeHeight, const int subTreeHeight, bool ompStaticScheduling) :
_nbThreads(nbThreads) ,
_loader(fileName),
_tree(treeHeight, subTreeHeight,_loader.getBoxWidth(),_loader.getCenterOfBox()),
_ompStaticScheduling(ompStaticScheduling) {
}
~PerfTest() {
if(_algo != nullptr)
delete _algo;
}
protected:
virtual void setup() {
omp_set_num_threads(_nbThreads);
std::cout << "\n>> Using " << omp_get_max_threads() << " threads.\n" << std::endl;
loadTree(_loader,_tree);
}
virtual void runAlgo() {
time.tic();
const MatrixKernelClass MatrixKernel;
KernelClass kernels(_tree.getHeight(), _loader.getBoxWidth(), _loader.getCenterOfBox(),&MatrixKernel);
_algo = new FmmClass(&_tree, &kernels,_ompStaticScheduling);
_algo->execute();
time.tac();
}
void finalize() {
AbstractPerfTest::finalize<LeafClass>(_tree, *_algo, _loader);
}
};
template <>
class PerfTest<FFmmAlgorithmTask> : public AbstractPerfTest {
public: // typedefs
using CellClass = FChebCell<FReal, ORDER>;
using ContainerClass = FP2PParticleContainerIndexed<FReal>;
using LeafClass = FSimpleLeaf<FReal, ContainerClass >;
using OctreeClass = FOctree<FReal, CellClass, ContainerClass, LeafClass>;
using MatrixKernelClass = FInterpMatrixKernelR<FReal>;
using KernelClass = FChebSymKernel <FReal, CellClass, ContainerClass,
MatrixKernelClass, ORDER>;
using FmmClass = FFmmAlgorithmTask<OctreeClass, CellClass, ContainerClass, KernelClass, LeafClass>;
protected:
int _nbThreads;
FFmaGenericLoader<FReal> _loader;
......@@ -221,9 +279,11 @@ protected:
void finalize() {
AbstractPerfTest::finalize<LeafClass>(_tree, *_algo, _loader);
}
}
};
template <>
class PerfTest<FFmmAlgorithmThreadBalanced> : public AbstractPerfTest {
public: // typedefs
......@@ -316,7 +376,7 @@ int main(int argc, char* argv[])
FParameterDefinitions::OctreeHeight,
FParameterDefinitions::OctreeSubHeight,
FParameterDefinitions::NbThreads,
{{"--algo"},"Algorithm to run (costzones, basic)"});
{{"--algo"},"Algorithm to run (costzones, basic-static, basic-dynamic, task)"});
const std::string defaultFile("../Data/unitCubeXYZQ100.bfma" );
const std::string filename =
......@@ -334,11 +394,21 @@ int main(int argc, char* argv[])
<< "(" << SubTreeHeight << ") algo: " << algoChoice << std::endl;
if(algoChoice == "costzones") {
PerfTest<FFmmAlgorithmThreadBalanced> balancePerfTest(filename, NbThreads, TreeHeight, SubTreeHeight);
PerfTest<FFmmAlgorithmThreadBalanced>
balancePerfTest(filename, NbThreads, TreeHeight, SubTreeHeight);
balancePerfTest.run();
} else if (algoChoice == "basic") {
PerfTest<FFmmAlgorithmThread> threadPerfTest(filename, NbThreads, TreeHeight, SubTreeHeight);
threadPerfTest.run();
} else if (algoChoice == "basic-static") {
PerfTest<FFmmAlgorithmThread>
threadPerfTestStatic(filename, NbThreads, TreeHeight, SubTreeHeight, true);
threadPerfTestStatic.run();
} else if (algoChoice == "basic-dynamic") {
PerfTest<FFmmAlgorithmThread>
threadPerfTestDynamic(filename, NbThreads, TreeHeight, SubTreeHeight,false);
threadPerfTestDynamic.run();
} else if (algoChoice == "task") {
PerfTest<FFmmAlgorithmTask>
taskPerfTest(filename, NbThreads, TreeHeight, SubTreeHeight);
taskPerfTest.run();
} else {
std::cerr << "Wrong algorithm choice. Try 'basic' or 'costzones'." << std::endl;
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment