Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
S
ScalFMM
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
5
Issues
5
List
Boards
Labels
Service Desk
Milestones
Operations
Operations
Incidents
Packages & Registries
Packages & Registries
Container Registry
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
solverstack
ScalFMM
Commits
6892f7f4
Commit
6892f7f4
authored
Apr 27, 2015
by
PIACIBELLO Cyrille
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'master' of
git+ssh://scm.gforge.inria.fr//gitroot/scalfmm/scalfmm
parents
611d30bf
d2cff93f
Changes
20
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
914 additions
and
126 deletions
+914
-126
CMakeModules/morse/find/FindBLAS.cmake
CMakeModules/morse/find/FindBLAS.cmake
+13
-0
CMakeModules/morse/find/FindCBLAS.cmake
CMakeModules/morse/find/FindCBLAS.cmake
+14
-5
CMakeModules/morse/find/FindSTARPU.cmake
CMakeModules/morse/find/FindSTARPU.cmake
+6
-1
Src/BalanceTree/FCostCell.hpp
Src/BalanceTree/FCostCell.hpp
+12
-1
Src/Core/FFmmAlgorithmTask.hpp
Src/Core/FFmmAlgorithmTask.hpp
+25
-10
Src/Core/FFmmAlgorithmThread.hpp
Src/Core/FFmmAlgorithmThread.hpp
+1
-0
Src/GroupTree/Core/FGroupOfCellsDyn.hpp
Src/GroupTree/Core/FGroupOfCellsDyn.hpp
+14
-8
Src/GroupTree/Core/FGroupOfParticlesDyn.hpp
Src/GroupTree/Core/FGroupOfParticlesDyn.hpp
+18
-0
Src/GroupTree/Core/FGroupTreeDyn.hpp
Src/GroupTree/Core/FGroupTreeDyn.hpp
+89
-64
Src/GroupTree/StarPUUtils/FStarPUFmmPriorities.hpp
Src/GroupTree/StarPUUtils/FStarPUFmmPriorities.hpp
+4
-5
Src/GroupTree/StarPUUtils/FStarPUHeteoprio.hpp
Src/GroupTree/StarPUUtils/FStarPUHeteoprio.hpp
+50
-20
Tests/noDist/AlgoLoaderCostZones.hpp
Tests/noDist/AlgoLoaderCostZones.hpp
+82
-0
Tests/noDist/AlgoLoaderTask.hpp
Tests/noDist/AlgoLoaderTask.hpp
+45
-0
Tests/noDist/AlgoLoaderThread.hpp
Tests/noDist/AlgoLoaderThread.hpp
+42
-0
Tests/noDist/BalancePerfTest.cpp
Tests/noDist/BalancePerfTest.cpp
+77
-7
Tests/noDist/KernelLoaderFChebSym.hpp
Tests/noDist/KernelLoaderFChebSym.hpp
+67
-0
Tests/noDist/PerfTest.cpp
Tests/noDist/PerfTest.cpp
+85
-0
Tests/noDist/PerfTestUtils.hpp
Tests/noDist/PerfTestUtils.hpp
+175
-0
Tests/noDist/TreeLoaderFCheb.hpp
Tests/noDist/TreeLoaderFCheb.hpp
+48
-0
Tests/noDist/testBlockedAlgorithmDyn.cpp
Tests/noDist/testBlockedAlgorithmDyn.cpp
+47
-5
No files found.
CMakeModules/morse/find/FindBLAS.cmake
View file @
6892f7f4
...
...
@@ -47,6 +47,19 @@
## Intel10_64lp_seq (intel mkl v10 64 bit,sequential code, lp64 model),
## Intel( older versions of mkl 32 and 64 bit), ACML,ACML_MP,ACML_GPU,Apple, NAS, Generic
# C/CXX should be enabled to use Intel mkl
###
# We handle different modes to find the dependency
#
# - Detection if already installed on the system
# - BLAS libraries can be detected from different ways
# Here is the order of precedence:
# 1) we look in cmake variable BLAS_LIBDIR or BLAS_DIR (we guess the libdirs) if defined
# 2) we look in environnement variable BLAS_LIBDIR or BLAS_DIR (we guess the libdirs) if defined
# 3) we look in common environnment variables depending on the system (INCLUDE, C_INCLUDE_PATH, CPATH - LIB, DYLD_LIBRARY_PATH, LD_LIBRARY_PATH)
# 4) we look in common system paths depending on the system, see for example paths contained in the following cmake variables:
# - CMAKE_PLATFORM_IMPLICIT_INCLUDE_DIRECTORIES, CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES
# - CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES, CMAKE_C_IMPLICIT_LINK_DIRECTORIES
#
#=============================================================================
# Copyright 2007-2009 Kitware, Inc.
...
...
CMakeModules/morse/find/FindCBLAS.cmake
View file @
6892f7f4
...
...
@@ -48,6 +48,19 @@
# look for a stand alone cblas, please add the following in your
# CMakeLists.txt before to call find_package(CBLAS):
# set(CBLAS_STANDALONE TRUE)
###
# We handle different modes to find the dependency
#
# - Detection if already installed on the system
# - CBLAS libraries can be detected from different ways
# Here is the order of precedence:
# 1) we look in cmake variable CBLAS_LIBDIR or CBLAS_DIR (we guess the libdirs) if defined
# 2) we look in environnement variable CBLAS_LIBDIR or CBLAS_DIR (we guess the libdirs) if defined
# 3) we look in common environnment variables depending on the system (INCLUDE, C_INCLUDE_PATH, CPATH - LIB, DYLD_LIBRARY_PATH, LD_LIBRARY_PATH)
# 4) we look in common system paths depending on the system, see for example paths contained in the following cmake variables:
# - CMAKE_PLATFORM_IMPLICIT_INCLUDE_DIRECTORIES, CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES
# - CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES, CMAKE_C_IMPLICIT_LINK_DIRECTORIES
#
#=============================================================================
# Copyright 2012-2013 Inria
...
...
@@ -80,11 +93,7 @@ endif()
if
(
CBLAS_FIND_COMPONENTS
)
foreach
(
component
${
CBLAS_FIND_COMPONENTS
}
)
if
(
CBLAS_FIND_REQUIRED_
${
component
}
)
if
(
CBLAS_FIND_REQUIRED
)
find_package
(
${
component
}
REQUIRED
)
else
()
find_package
(
${
component
}
)
endif
()
find_package
(
${
component
}
REQUIRED
)
else
()
find_package
(
${
component
}
)
endif
()
...
...
CMakeModules/morse/find/FindSTARPU.cmake
View file @
6892f7f4
...
...
@@ -310,7 +310,12 @@ if( (NOT PKG_CONFIG_EXECUTABLE) OR (PKG_CONFIG_EXECUTABLE AND NOT STARPU_FOUND)
set
(
STARPU_
${
starpu_hdr
}
_INCLUDE_DIRS
"STARPU_
${
starpu_hdr
}
_INCLUDE_DIRS-NOTFOUND"
)
find_path
(
STARPU_
${
starpu_hdr
}
_INCLUDE_DIRS
NAMES
${
starpu_hdr
}
HINTS
${
_inc_env
}
)
HINTS
${
_inc_env
}
PATH_SUFFIXES
"starpu/1.0"
"starpu/1.1"
"starpu/1.2"
"starpu/1.3"
)
endforeach
()
endif
()
endif
()
...
...
Src/BalanceTree/FCostCell.hpp
View file @
6892f7f4
...
...
@@ -7,6 +7,17 @@
#include <type_traits>
/**
* \brief Empty trait class.
* \author Quentin Khan
*
* This class is used to check whether a cell class has FCostCell in its
* inheritance tree.
*/
class
FCostCellTypeTrait
{};
/**
* \brief Cell with a cost memory for balance computations.
* \author Quentin Khan
...
...
@@ -18,7 +29,7 @@
* \tparam CostType The type to use in order to store the cost. Defaults to FSize.
*/
template
<
typename
BaseClass
,
typename
CostType
=
FSize
>
class
FCostCell
:
public
BaseClass
{
class
FCostCell
:
public
BaseClass
,
virtual
public
FCostCellTypeTrait
{
static_assert
(
std
::
is_arithmetic
<
CostType
>::
value
,
"The cell cost type must be an arithmetic type."
);
...
...
Src/Core/FFmmAlgorithmTask.hpp
View file @
6892f7f4
...
...
@@ -40,7 +40,7 @@
* Of course this class does not deallocate pointer given in arguements.
*/
template
<
class
OctreeClass
,
class
CellClass
,
class
ContainerClass
,
class
KernelClass
,
class
LeafClass
>
class
FFmmAlgorithmTask
:
public
FAbstractAlgorithm
{
class
FFmmAlgorithmTask
:
public
FAbstractAlgorithm
,
public
FAlgorithmTimers
{
OctreeClass
*
const
tree
;
//< The octree to work on
KernelClass
**
kernels
;
//< The kernels
...
...
@@ -92,15 +92,30 @@ protected:
*/
void
executeCore
(
const
unsigned
operationsToProceed
)
override
{
if
(
operationsToProceed
&
FFmmP2M
)
bottomPass
();
if
(
operationsToProceed
&
FFmmM2M
)
upwardPass
();
if
(
operationsToProceed
&
FFmmM2L
)
transferPass
();
if
(
operationsToProceed
&
FFmmL2L
)
downardPass
();
if
((
operationsToProceed
&
FFmmP2P
)
||
(
operationsToProceed
&
FFmmL2P
))
directPass
((
operationsToProceed
&
FFmmP2P
),(
operationsToProceed
&
FFmmL2P
));
Timers
[
P2MTimer
].
tic
();
if
(
operationsToProceed
&
FFmmP2M
)
bottomPass
();
Timers
[
P2MTimer
].
tac
();
Timers
[
M2MTimer
].
tic
();
if
(
operationsToProceed
&
FFmmM2M
)
upwardPass
();
Timers
[
M2MTimer
].
tac
();
Timers
[
M2LTimer
].
tic
();
if
(
operationsToProceed
&
FFmmM2L
)
transferPass
();
Timers
[
M2LTimer
].
tac
();
Timers
[
L2LTimer
].
tic
();
if
(
operationsToProceed
&
FFmmL2L
)
downardPass
();
Timers
[
L2LTimer
].
tac
();
Timers
[
NearTimer
].
tic
();
if
(
(
operationsToProceed
&
FFmmP2P
)
||
(
operationsToProceed
&
FFmmL2P
)
)
directPass
((
operationsToProceed
&
FFmmP2P
),(
operationsToProceed
&
FFmmL2P
));
Timers
[
NearTimer
].
tac
();
}
/////////////////////////////////////////////////////////////////////////////
...
...
Src/Core/FFmmAlgorithmThread.hpp
View file @
6892f7f4
...
...
@@ -78,6 +78,7 @@ public:
* The constructor needs the octree and the kernels used for computation.
* \param inTree the octree to work on.
* \param inKernels the kernels to call.
* \param inStaticSchedule Whether to use static or dynamic OpenMP scheduling.
*
* \except An exception is thrown if one of the arguments is NULL.
*/
...
...
Src/GroupTree/Core/FGroupOfCellsDyn.hpp
View file @
6892f7f4
...
...
@@ -177,8 +177,8 @@ public:
cellMultipoles
=
(
unsigned
char
*
)
FAlignedMemory
::
AllocateBytes
<
32
>
(
inNumberOfCells
*
cellSizes
->
poleCellClassSize
);
memset
(
cellMultipoles
,
0
,
inNumberOfCells
*
cellSizes
->
poleCellClassSize
);
cellLocals
=
(
unsigned
char
*
)
FAlignedMemory
::
AllocateBytes
<
32
>
(
inNumberOfCells
*
cellSizes
->
pole
CellClassSize
);
memset
(
cellLocals
,
0
,
inNumberOfCells
*
cellSizes
->
pole
CellClassSize
);
cellLocals
=
(
unsigned
char
*
)
FAlignedMemory
::
AllocateBytes
<
32
>
(
inNumberOfCells
*
cellSizes
->
local
CellClassSize
);
memset
(
cellLocals
,
0
,
inNumberOfCells
*
cellSizes
->
local
CellClassSize
);
// Set all index to not used
for
(
int
idxCellPtr
=
0
;
idxCellPtr
<
blockIndexesTableSize
;
++
idxCellPtr
){
...
...
@@ -314,15 +314,21 @@ public:
}
/** Allocate a new cell by calling its constructor */
template
<
typename
...
CellConstructorParams
>
void
newCell
(
const
MortonIndex
inIndex
,
const
int
id
,
CellConstructorParams
...
args
){
void
newCell
(
const
MortonIndex
inIndex
,
const
int
id
,
std
::
function
<
void
(
const
MortonIndex
mindex
,
unsigned
char
*
symbBuff
,
const
size_t
symbSize
,
unsigned
char
*
upBuff
,
const
size_t
upSize
,
unsigned
char
*
downBuff
,
const
size_t
downSize
,
const
int
level
)
>
BuildCellFunc
,
const
int
inLevel
){
FAssertLF
(
isInside
(
inIndex
));
FAssertLF
(
!
exists
(
inIndex
));
FAssertLF
(
id
<
blockHeader
->
blockIndexesTableSize
);
CompositeCellClass
cell
(
&
blockCells
[
id
*
cellSizes
->
symbCellClassSize
],
&
cellMultipoles
[
id
*
cellSizes
->
poleCellClassSize
],
&
cellLocals
[
id
*
cellSizes
->
localCellClassSize
]);
cell
.
init
(
args
...);
BuildCellFunc
(
inIndex
,
&
blockCells
[
id
*
cellSizes
->
symbCellClassSize
],
cellSizes
->
symbCellClassSize
,
&
cellMultipoles
[
id
*
cellSizes
->
poleCellClassSize
],
cellSizes
->
poleCellClassSize
,
&
cellLocals
[
id
*
cellSizes
->
localCellClassSize
],
cellSizes
->
localCellClassSize
,
inLevel
);
blockIndexesTable
[
inIndex
-
blockHeader
->
startingIndex
]
=
id
;
}
...
...
Src/GroupTree/Core/FGroupOfParticlesDyn.hpp
View file @
6892f7f4
...
...
@@ -266,6 +266,24 @@ public:
}
return
ParticlesAttachedClass
();
}
/** Return the buffer for a leaf or null if it does not exist */
unsigned
char
*
getLeafSymbBuffer
(
const
MortonIndex
leafIndex
){
if
(
blockIndexesTable
[
leafIndex
-
blockHeader
->
startingIndex
]
!=
LeafIsEmptyFlag
){
const
int
id
=
blockIndexesTable
[
leafIndex
-
blockHeader
->
startingIndex
];
return
(
symbPart
+
leafHeader
[
id
].
offSetSymb
);
}
return
nullptr
;
}
/** Return the buffer for a leaf or null if it does not exist */
unsigned
char
*
getLeafDownBuffer
(
const
MortonIndex
leafIndex
){
if
(
blockIndexesTable
[
leafIndex
-
blockHeader
->
startingIndex
]
!=
LeafIsEmptyFlag
){
const
int
id
=
blockIndexesTable
[
leafIndex
-
blockHeader
->
startingIndex
];
return
(
downPart
?
downPart
+
leafHeader
[
id
].
offSetDown
:
nullptr
);
}
return
nullptr
;
}
};
#endif // FGROUPOFPARTICLESDYN_HPP
...
...
Src/GroupTree/Core/FGroupTreeDyn.hpp
View file @
6892f7f4
This diff is collapsed.
Click to expand it.
Src/GroupTree/StarPUUtils/FStarPUFmmPriorities.hpp
View file @
6892f7f4
...
...
@@ -45,17 +45,16 @@ public:
return
controller
;
}
static
void
InitSchedulerCallback
(
unsigned
sched_ctx_id
,
struct
_starpu_heteroprio_center_policy_heteroprio
*
heteroprio
){
Controller
().
initSchedulerCallback
(
sched_ctx_id
,
heteroprio
);
static
void
InitSchedulerCallback
(
unsigned
sched_ctx_id
,
void
*
heteroprio
){
Controller
().
initSchedulerCallback
(
sched_ctx_id
,
(
struct
_starpu_heteroprio_center_policy_heteroprio
*
)
heteroprio
);
}
void
init
(
struct
starpu_conf
*
conf
,
const
int
inTreeHeight
,
FStarPUKernelCapacities
*
inCapacities
){
capacities
=
inCapacities
;
conf
->
sched_policy
=
&
_starpu_sched_heteroprio_policy
,
initialize_heteroprio_center_policy_callback
=
&
InitSchedulerCallback
;
conf
->
sched_policy
=
&
_starpu_sched_heteroprio_policy
;
starpu_heteroprio_set_callback
(
&
InitSchedulerCallback
)
;
treeHeight
=
inTreeHeight
;
...
...
Src/GroupTree/StarPUUtils/FStarPUHeteoprio.hpp
View file @
6892f7f4
...
...
@@ -59,34 +59,27 @@
* #include "../../Src/GroupTree/StarPUUtils/FStarPUHeteoprio.hpp"
*
* void initSchedulerCallback(unsigned sched_ctx_id,
*
struct _starpu_heteroprio_center_policy_heteroprio *heteroprio){
* struct _starpu_heteroprio_center_policy_heteroprio *heteroprio){
* // CPU uses 3 buckets
*
heteroprio->nb_prio_per_arch_index[FSTARPU_CPU_IDX] = 3
;
*
starpu_heteroprio_set_nb_prios(heteroprio, FSTARPU_CPU_IDX, 3)
;
* // It uses direct mapping idx => idx
* for(unsigned idx = 0 ; idx < 3 ; ++idx){
* heteroprio->prio_mapping_per_arch_index[FSTARPU_CPU_IDX][idx] = idx;
* // We say CPU is faster
* heteroprio->buckets[idx].factor_base_arch_index = FSTARPU_CPU_IDX;
* // We must say that CPU uses these buckets
* heteroprio->buckets[idx].valide_archs |= STARPU_CPU;
* starpu_heteroprio_set_mapping(heteroprio, FSTARPU_CPU_IDX, idx, idx);
* starpu_heteroprio_set_faster_arch(heteroprio, FSTARPU_CPU_IDX, idx);
* }
* #ifdef STARPU_USE_OPENCL
* // OpenCL is enabled and uses 2 buckets
*
heteroprio->nb_prio_per_arch_index[FSTARPU_OPENCL_IDX] = 2
;
*
starpu_heteroprio_set_nb_prios(heteroprio, FSTARPU_OPENCL_IDX, 2)
;
* // OpenCL will first look to priority 2
* heteroprio->prio_mapping_per_arch_index[FSTARPU_OPENCL_IDX][0] = 2;
* // We tell the scheduler that OpenCL uses this bucket
* heteroprio->buckets[2].valide_archs |= STARPU_OPENCL;
* starpu_heteroprio_set_mapping(heteroprio, FSTARPU_OPENCL_IDX, 0, 2);
* // For this bucket OpenCL is the fastest
*
heteroprio->buckets[2].factor_base_arch_index = FSTARPU_OPENCL_IDX
;
*
starpu_heteroprio_set_faster_arch(heteroprio, FSTARPU_OPENCL_IDX, 2)
;
* // And CPU is 4 times slower
*
heteroprio->buckets[2].slow_factors_per_index[FSTARPU_CPU_IDX] = 4.0f
;
*
starpu_heteroprio_set_arch_slow_factor(heteroprio, FSTARPU_CPU_IDX, 2, 4.0f)
;
*
* heteroprio->prio_mapping_per_arch_index[FSTARPU_OPENCL_IDX][1] = 1;
* // We tell the scheduler that OpenCL uses this bucket
* heteroprio->buckets[1].valide_archs |= STARPU_OPENCL;
* // We let the CPU as the fastest PU and tell that OpenCL is 1.7 times slower
* heteroprio->buckets[1].slow_factors_per_index[FSTARPU_OPENCL_IDX] = 1.7f;
* starpu_heteroprio_set_mapping(heteroprio, FSTARPU_OPENCL_IDX, 1, 1);
* // We let the CPU as the fastest and tell that OpenCL is 1.7 times slower
* starpu_heteroprio_set_arch_slow_factor(heteroprio, FSTARPU_OPENCL_IDX, 1, 1.7f);
* #endif
* }
*
...
...
@@ -346,9 +339,46 @@ struct _starpu_heteroprio_center_policy_heteroprio
unsigned
nb_workers_per_arch_index
[
FSTARPU_NB_TYPES
];
};
/********************************************************************************/
/********************************************************************************/
/* This is the callback that must init the scheduler buckets */
/*extern*/
void
(
*
initialize_heteroprio_center_policy_callback
)(
unsigned
sched_ctx_id
,
struct
_starpu_heteroprio_center_policy_heteroprio
*
heteroprio
)
=
NULL
;
typedef
void
(
*
Heteroprio_callback_type
)(
unsigned
sched_ctx_id
,
void
*
heteroprio
);
/*extern*/
Heteroprio_callback_type
initialize_heteroprio_center_policy_callback
=
NULL
;
inline
void
starpu_heteroprio_set_callback
(
Heteroprio_callback_type
user_callback
){
initialize_heteroprio_center_policy_callback
=
user_callback
;
}
/** Tell how many prio there are for a given arch */
inline
void
starpu_heteroprio_set_nb_prios
(
void
*
heterodata
,
const
FStarPUTypes
arch
,
const
unsigned
max_prio
){
assert
(
max_prio
<
HETEROPRIO_MAX_PRIO
);
((
struct
_starpu_heteroprio_center_policy_heteroprio
*
)
heterodata
)
->
nb_prio_per_arch_index
[
arch
]
=
max_prio
;
}
/** Set the mapping for a given arch prio=>bucket */
inline
void
starpu_heteroprio_set_mapping
(
void
*
heterodata
,
const
FStarPUTypes
arch
,
const
unsigned
source_prio
,
const
unsigned
dest_bucket_id
){
assert
(
dest_bucket_id
<
HETEROPRIO_MAX_PRIO
);
((
struct
_starpu_heteroprio_center_policy_heteroprio
*
)
heterodata
)
->
prio_mapping_per_arch_index
[
arch
][
source_prio
]
=
dest_bucket_id
;
((
struct
_starpu_heteroprio_center_policy_heteroprio
*
)
heterodata
)
->
buckets
[
dest_bucket_id
].
valide_archs
|=
FStarPUTypesToArch
[
arch
];
}
/** Tell which arch is the faster for the tasks of a bucket (optional) */
inline
void
starpu_heteroprio_set_faster_arch
(
void
*
heterodata
,
const
FStarPUTypes
arch
,
const
unsigned
bucket_id
){
assert
(
bucket_id
<
HETEROPRIO_MAX_PRIO
);
((
struct
_starpu_heteroprio_center_policy_heteroprio
*
)
heterodata
)
->
buckets
[
bucket_id
].
factor_base_arch_index
=
arch
;
((
struct
_starpu_heteroprio_center_policy_heteroprio
*
)
heterodata
)
->
buckets
[
bucket_id
].
slow_factors_per_index
[
arch
]
=
0
;
}
/** Tell how slow is a arch for the tasks of a bucket (optional) */
inline
void
starpu_heteroprio_set_arch_slow_factor
(
void
*
heterodata
,
const
FStarPUTypes
arch
,
const
unsigned
bucket_id
,
const
float
slow_factor
){
assert
(
bucket_id
<
HETEROPRIO_MAX_PRIO
);
((
struct
_starpu_heteroprio_center_policy_heteroprio
*
)
heterodata
)
->
buckets
[
bucket_id
].
slow_factors_per_index
[
arch
]
=
slow_factor
;
}
/********************************************************************************/
/********************************************************************************/
/* Init the scheduler - This will call the init callback! */
static
void
initialize_heteroprio_center_policy
(
unsigned
sched_ctx_id
)
...
...
Tests/noDist/AlgoLoaderCostZones.hpp
0 → 100644
View file @
6892f7f4
#ifndef _ALGOLOADERCOSTZONES_HPP_
#define _ALGOLOADERCOSTZONES_HPP_
#include "PerfTestUtils.hpp"
#include "Core/FFmmAlgorithm.hpp"
#include "BalanceTree/FFmmAlgorithmThreadBalanced.hpp"
#include "BalanceTree/FCostCell.hpp"
#include "BalanceTree/FCostZones.hpp"
/**
* \brief Algorithm loader for the CostZones algorithm.
*
* \warning : This loader requires that the KernelLoader supply a type definition
* for a `CostKernelClass`
*/
template
<
class
_TreeLoader
,
template
<
typename
>
class
_KernelLoader
>
class
AlgoLoaderCostZones
:
public
FAlgoLoader
<
_TreeLoader
,
_KernelLoader
>
{
public:
using
TreeLoader
=
_TreeLoader
;
using
KernelLoader
=
_KernelLoader
<
TreeLoader
>
;
using
FReal
=
typename
TreeLoader
::
FReal
;
using
CellClass
=
typename
TreeLoader
::
CellClass
;
using
ContainerClass
=
typename
TreeLoader
::
ContainerClass
;
using
LeafClass
=
typename
TreeLoader
::
LeafClass
;
using
OctreeClass
=
typename
TreeLoader
::
OctreeClass
;
using
KernelClass
=
typename
KernelLoader
::
KernelClass
;
using
CostKernelClass
=
typename
KernelLoader
::
CostKernelClass
;
static_assert
(
std
::
is_base_of
<
FCostCellTypeTrait
,
CellClass
>::
value
,
"The tree cells must derive from FCostCell."
);
using
FMMClass
=
FFmmAlgorithmThreadBalanced
<
OctreeClass
,
CellClass
,
ContainerClass
,
KernelClass
,
LeafClass
>
;
using
CostFmmClass
=
FFmmAlgorithm
<
OctreeClass
,
CellClass
,
ContainerClass
,
CostKernelClass
,
LeafClass
>
;
TreeLoader
&
_treeLoader
;
KernelLoader
&
_kernelLoader
;
/// Builds the loader
AlgoLoaderCostZones
(
FPerfTestParams
&
/*params*/
,
TreeLoader
&
treeLoader
,
KernelLoader
&
kernelLoader
)
:
_treeLoader
(
treeLoader
),
_kernelLoader
(
kernelLoader
)
{
}
/// Computes the tree cells costs then runs the costzones and FMM algorithms.
void
run
()
{
OctreeClass
*
p_tree
=
&
(
_treeLoader
.
_tree
);
// Compute tree cells costs
CostFmmClass
costAlgo
(
p_tree
,
&
(
_kernelLoader
.
_costKernel
));
this
->
time
.
tic
();
costAlgo
.
execute
();
this
->
time
.
tac
();
std
::
cout
<<
"Generating tree cost: "
<<
this
->
time
.
elapsed
()
<<
"s.
\n
"
;
FCostZones
<
OctreeClass
,
CellClass
>
costzones
(
p_tree
,
omp_get_max_threads
());
this
->
time
.
tic
();
costzones
.
run
();
this
->
time
.
tac
();
std
::
cout
<<
"Generating cost zones: "
<<
this
->
time
.
elapsed
()
<<
"s.
\n
"
;
this
->
time
.
tic
();
FMMClass
algo
(
p_tree
,
&
(
_kernelLoader
.
_kernel
),
costzones
.
getZoneBounds
(),
costzones
.
getLeafZoneBounds
());
algo
.
execute
();
this
->
time
.
tac
();
}
};
#endif
Tests/noDist/AlgoLoaderTask.hpp
0 → 100644
View file @
6892f7f4
#ifndef _ALGOLOADERTASK_HPP_
#define _ALGOLOADERTASK_HPP_
#include "PerfTestUtils.hpp"
#include "Core/FFmmAlgorithmTask.hpp"
template
<
class
_TreeLoader
,
template
<
typename
>
class
_KernelLoader
>
class
AlgoLoaderTask
:
public
FAlgoLoader
<
_TreeLoader
,
_KernelLoader
>
{
public:
using
TreeLoader
=
_TreeLoader
;
using
KernelLoader
=
_KernelLoader
<
TreeLoader
>
;
using
FReal
=
typename
TreeLoader
::
FReal
;
using
CellClass
=
typename
TreeLoader
::
CellClass
;
using
ContainerClass
=
typename
TreeLoader
::
ContainerClass
;
using
LeafClass
=
typename
TreeLoader
::
LeafClass
;
using
OctreeClass
=
typename
TreeLoader
::
OctreeClass
;
using
KernelClass
=
typename
KernelLoader
::
KernelClass
;
using
FMMClass
=
FFmmAlgorithmTask
<
OctreeClass
,
CellClass
,
ContainerClass
,
KernelClass
,
LeafClass
>
;
TreeLoader
&
_treeLoader
;
KernelLoader
&
_kernelLoader
;
AlgoLoaderTask
(
FPerfTestParams
&
/*params*/
,
TreeLoader
&
treeLoader
,
KernelLoader
&
kernelLoader
)
:
_treeLoader
(
treeLoader
),
_kernelLoader
(
kernelLoader
)
{
}
void
run
()
{
FMMClass
algo
(
&
(
_treeLoader
.
_tree
),
&
(
_kernelLoader
.
_kernel
));
algo
.
execute
();
}
};
#endif
Tests/noDist/AlgoLoaderThread.hpp
0 → 100644
View file @
6892f7f4
#ifndef _ALGOLOADERTHREAD_HPP_
#define _ALGOLOADERTHREAD_HPP_
#include "PerfTestUtils.hpp"
#include "Core/FFmmAlgorithmThread.hpp"
template
<
class
_TreeLoader
,
template
<
typename
>
class
_KernelLoader
>
class
AlgoLoaderThread
:
public
FAlgoLoader
<
_TreeLoader
,
_KernelLoader
>
{
public:
using
TreeLoader
=
_TreeLoader
;
using
KernelLoader
=
_KernelLoader
<
TreeLoader
>
;
using
FReal
=
typename
TreeLoader
::
FReal
;
using
CellClass
=
typename
TreeLoader
::
CellClass
;
using
ContainerClass
=
typename
TreeLoader
::
ContainerClass
;
using
LeafClass
=
typename
TreeLoader
::
LeafClass
;
using
OctreeClass
=
typename
TreeLoader
::
OctreeClass
;
using
KernelClass
=
typename
KernelLoader
::
KernelClass
;
using
FMMClass
=
FFmmAlgorithmThread
<
OctreeClass
,
CellClass
,
ContainerClass
,
KernelClass
,
LeafClass
>
;
TreeLoader
&
_treeLoader
;
KernelLoader
&
_kernelLoader
;
AlgoLoaderThread
(
FPerfTestParams
&
/*params*/
,
TreeLoader
&
treeLoader
,
KernelLoader
&
kernelLoader
)
:
_treeLoader
(
treeLoader
),
_kernelLoader
(
kernelLoader
)
{
}
void
run
()
{
FMMClass
algo
(
&
(
_treeLoader
.
_tree
),
&
(
_kernelLoader
.
_kernel
),
false
);
algo
.
execute
();
}
};
#endif
Tests/noDist/BalancePerfTest.cpp
View file @
6892f7f4
...
...
@@ -60,6 +60,7 @@
// Algorithms
#include "Core/FFmmAlgorithm.hpp"
#include "Core/FFmmAlgorithmThread.hpp"
#include "Core/FFmmAlgorithmTask.hpp"
#include "BalanceTree/FFmmAlgorithmThreadBalanced.hpp"
#include "BalanceTree/FCostZones.hpp"
...
...
@@ -88,7 +89,7 @@ protected:
template
<
class
OctreeClass
>
void
loadTree
(
FFmaGenericLoader
<
FReal
>&
loader
,
OctreeClass
&
tree
)
{
std
::
cout
<<
"Creating & inserting particles"
;
std
::
cout
<<
"Creating & inserting particles"
<<
std
::
flush
;
time
.
tic
();
...
...
@@ -183,6 +184,63 @@ public: // typedefs
using
FmmClass
=
FFmmAlgorithmThread
<
OctreeClass
,
CellClass
,
ContainerClass
,
KernelClass
,
LeafClass
>
;
protected:
int
_nbThreads
;
FFmaGenericLoader
<
FReal
>
_loader
;
OctreeClass
_tree
;
FmmClass
*
_algo
;
bool
_ompStaticScheduling
;
public:
PerfTest
(
const
std
::
string
&
fileName
,
const
int
nbThreads
,
const
int
treeHeight
,
const
int
subTreeHeight
,
bool
ompStaticScheduling
)
:
_nbThreads
(
nbThreads
)
,
_loader
(
fileName
),
_tree
(
treeHeight
,
subTreeHeight
,
_loader
.
getBoxWidth
(),
_loader
.
getCenterOfBox
()),
_ompStaticScheduling
(
ompStaticScheduling
)
{
}
~
PerfTest
()
{
if
(
_algo
!=
nullptr
)
delete
_algo
;
}
protected:
virtual
void
setup
()
{
omp_set_num_threads
(
_nbThreads
);
std
::
cout
<<
"
\n
>> Using "
<<
omp_get_max_threads
()
<<
" threads.
\n
"
<<
std
::
endl
;
loadTree
(
_loader
,
_tree
);
}
virtual
void
runAlgo
()
{
time
.
tic
();
const
MatrixKernelClass
MatrixKernel
;
KernelClass
kernels
(
_tree
.
getHeight
(),
_loader
.
getBoxWidth
(),
_loader
.
getCenterOfBox
(),
&
MatrixKernel
);
_algo
=
new
FmmClass
(
&
_tree
,
&
kernels
,
_ompStaticScheduling
);
_algo
->
execute
();
time
.
tac
();
}
void
finalize
()
{
AbstractPerfTest
::
finalize
<
LeafClass
>
(
_tree
,
*
_algo
,
_loader
);
}
};
template
<
>
class
PerfTest
<
FFmmAlgorithmTask
>
:
public
AbstractPerfTest
{
public:
// typedefs
using
CellClass
=
FChebCell
<
FReal
,
ORDER
>
;
using
ContainerClass
=
FP2PParticleContainerIndexed
<
FReal
>
;
using
LeafClass
=
FSimpleLeaf
<
FReal
,
ContainerClass
>
;
using
OctreeClass
=
FOctree
<
FReal
,
CellClass
,
ContainerClass
,
LeafClass
>
;
using
MatrixKernelClass
=
FInterpMatrixKernelR
<
FReal
>
;
using
KernelClass
=
FChebSymKernel
<
FReal
,
CellClass
,
ContainerClass
,
MatrixKernelClass
,
ORDER
>
;
using
FmmClass
=
FFmmAlgorithmTask
<
OctreeClass
,
CellClass
,
ContainerClass
,
KernelClass
,
LeafClass
>
;
protected:
int
_nbThreads
;
FFmaGenericLoader
<
FReal
>
_loader
;
...
...
@@ -221,9 +279,11 @@ protected:
void
finalize
()
{
AbstractPerfTest
::
finalize
<
LeafClass
>
(
_tree
,
*
_algo
,
_loader
);
}
}
};
template
<
>
class
PerfTest
<
FFmmAlgorithmThreadBalanced
>
:
public
AbstractPerfTest
{