Mentions légales du service
Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
S
ScalFMM
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
solverstack
ScalFMM
Commits
53a0afb5
Commit
53a0afb5
authored
10 years ago
by
BRAMAS Berenger
Browse files
Options
Downloads
Patches
Plain Diff
Add a balance FMM algo - kernel independant
parent
e0898b5b
Branches
Branches containing commit
Tags
Tags containing commit
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
Src/Core/FFmmAlgorithmThreadBalance.hpp
+609
-0
609 additions, 0 deletions
Src/Core/FFmmAlgorithmThreadBalance.hpp
with
609 additions
and
0 deletions
Src/Core/FFmmAlgorithmThreadBalance.hpp
0 → 100644
+
609
−
0
View file @
53a0afb5
#ifndef FFmmAlgorithmThreadBalanceBALANCE_HPP
#define FFmmAlgorithmThreadBalanceBALANCE_HPP
#include
"../Utils/FAssert.hpp"
#include
"../Utils/FLog.hpp"
#include
"../Utils/FTic.hpp"
#include
"../Utils/FGlobal.hpp"
#include
"Utils/FAlgorithmTimers.hpp"
#include
"../Containers/FOctree.hpp"
#include
"FCoreCommon.hpp"
#include
<omp.h>
#include
<vector>
#include
<memory>
/**
* \author Berenger Bramas (berenger.bramas@inria.fr)
* \brief Implements an FMM algorithm threaded using OpenMP.
*
* Please read the license
*
* This class runs a threaded FMM algorithm.
* It balance the execution between threads.
*
* When using this algorithm the P2P is thread safe.
*
* This class does not deallocate pointers given to its constructor.
*/
template
<
class
OctreeClass
,
class
CellClass
,
class
ContainerClass
,
class
KernelClass
,
class
LeafClass
>
class
FFmmAlgorithmThreadBalance
:
public
FAbstractAlgorithm
,
public
FAlgorithmTimers
{
OctreeClass
*
const
tree
;
///< The octree to work on.
KernelClass
**
kernels
;
///< The kernels.
static
const
int
SizeShape
=
3
*
3
*
3
;
const
int
MaxThreads
;
///< The maximum number of threads.
const
int
OctreeHeight
;
///< The height of the given tree.
const
int
leafLevelSeperationCriteria
;
public:
/** Class constructor
*
* The constructor needs the octree and the kernels used for computation.
* \param inTree the octree to work on.
* \param inKernels the kernels to call.
* \param inUserChunckSize To specify the chunck size in the loops (-1 is static, 0 is N/p^2, otherwise it
* directly used as the number of item to proceed together), default is 10
*
* \except An exception is thrown if one of the arguments is NULL.
*/
FFmmAlgorithmThreadBalance
(
OctreeClass
*
const
inTree
,
KernelClass
*
const
inKernels
,
const
int
inUserChunkSize
=
10
,
const
int
inLeafLevelSeperationCriteria
=
1
)
:
tree
(
inTree
)
,
kernels
(
nullptr
),
MaxThreads
(
omp_get_max_threads
()),
OctreeHeight
(
tree
->
getHeight
()),
leafLevelSeperationCriteria
(
inLeafLevelSeperationCriteria
)
{
FAssertLF
(
tree
,
"tree cannot be null"
);
this
->
kernels
=
new
KernelClass
*
[
MaxThreads
];
#pragma omp parallel for schedule(static)
for
(
int
idxThread
=
0
;
idxThread
<
MaxThreads
;
++
idxThread
){
#pragma omp critical (InitFFmmAlgorithmThreadBalance)
{
this
->
kernels
[
idxThread
]
=
new
KernelClass
(
*
inKernels
);
}
}
FAbstractAlgorithm
::
setNbLevelsInTree
(
OctreeHeight
);
buildThreadIntervals
();
FLOG
(
FLog
::
Controller
<<
"FFmmAlgorithmThreadBalance (Max Thread "
<<
omp_get_max_threads
()
<<
")
\n
"
);
}
/** Default destructor */
virtual
~
FFmmAlgorithmThreadBalance
(){
for
(
int
idxThread
=
0
;
idxThread
<
MaxThreads
;
++
idxThread
){
delete
this
->
kernels
[
idxThread
];
}
delete
[]
this
->
kernels
;
}
/**
* Runs the complete algorithm.
*/
void
executeCore
(
const
unsigned
operationsToProceed
)
override
{
Timers
[
P2MTimer
].
tic
();
if
(
operationsToProceed
&
FFmmP2M
)
bottomPass
();
Timers
[
P2MTimer
].
tac
();
Timers
[
M2MTimer
].
tic
();
if
(
operationsToProceed
&
FFmmM2M
)
upwardPass
();
Timers
[
M2MTimer
].
tac
();
Timers
[
M2LTimer
].
tic
();
if
(
operationsToProceed
&
FFmmM2L
)
transferPass
();
Timers
[
M2LTimer
].
tac
();
Timers
[
L2LTimer
].
tic
();
if
(
operationsToProceed
&
FFmmL2L
)
downardPass
();
Timers
[
L2LTimer
].
tac
();
Timers
[
NearTimer
].
tic
();
if
(
operationsToProceed
&
FFmmL2P
)
L2P
();
if
(
operationsToProceed
&
FFmmP2P
)
directPass
();
Timers
[
NearTimer
].
tac
();
}
protected
:
/////////////////////////////////////////////////////////////////////////////
// P2M
/////////////////////////////////////////////////////////////////////////////
/** The workload contains what a thread need to perfom its interval of work */
struct
Workload
{
typename
OctreeClass
::
Iterator
iterator
;
int
nbElements
;
};
//< The work per thread for the P2M
std
::
vector
<
Workload
>
workloadP2M
;
//< The work per level and per thread for the M2M
std
::
vector
<
std
::
vector
<
Workload
>>
workloadM2M
;
//< The work per level and per thread for the M2L
std
::
vector
<
std
::
vector
<
Workload
>>
workloadM2L
;
//< The work per level and per thread for the L2L
std
::
vector
<
std
::
vector
<
Workload
>>
workloadL2L
;
//< The work per thread for the L2P
std
::
vector
<
Workload
>
workloadL2P
;
//< The work per shape and per thread for the P2P
std
::
vector
<
std
::
vector
<
std
::
pair
<
int
,
int
>>>
workloadP2P
;
/** This structure is needed by the thread for the P2P because of the colors */
struct
LeafData
{
MortonIndex
index
;
FTreeCoordinate
coord
;
ContainerClass
*
targets
;
ContainerClass
*
sources
;
};
/** Direct access to the data for the P2P */
std
::
unique_ptr
<
LeafData
[]
>
leafsDataArray
;
/** This struct is used during the preparation of the interval */
struct
WorkloadTemp
{
typename
OctreeClass
::
Iterator
iterator
;
FSize
amountOfWork
;
};
/** From a vector of work (workPerElement) generate the interval */
void
generateIntervalFromWorkload
(
std
::
vector
<
Workload
>*
intervals
,
const
FSize
totalWork
,
WorkloadTemp
*
workPerElement
,
const
FSize
nbElements
)
const
{
// Now split between thread
(
*
intervals
).
resize
(
MaxThreads
);
// Ideally each thread will have this
const
FSize
idealWork
=
(
totalWork
/
MaxThreads
);
// Assign default value for first thread
int
idxThread
=
0
;
(
*
intervals
)[
idxThread
].
iterator
=
workPerElement
[
0
].
iterator
;
(
*
intervals
)[
idxThread
].
nbElements
=
1
;
FSize
assignWork
=
workPerElement
[
0
].
amountOfWork
;
for
(
int
idxElement
=
1
;
idxElement
<
nbElements
;
++
idxElement
){
// is it more balance if we add the current element to the current thread
if
(
FMath
::
Abs
((
idxThread
+
1
)
*
idealWork
-
assignWork
)
<
FMath
::
Abs
((
idxThread
+
1
)
*
idealWork
-
assignWork
-
workPerElement
[
idxElement
].
amountOfWork
)
&&
idxThread
!=
MaxThreads
-
1
){
/// FLOG(FLog::Controller << "[Balance] Shape Thread " << idxThread << " goes from "
/// << (*intervals)[idxThread].iterator.getCurrentGlobalIndex() << " nb " << (*intervals)[idxThread].nbElements << "\n");
// if not start filling the next thread
idxThread
+=
1
;
(
*
intervals
)[
idxThread
].
iterator
=
workPerElement
[
idxElement
].
iterator
;
(
*
intervals
)[
idxThread
].
nbElements
=
0
;
}
(
*
intervals
)[
idxThread
].
nbElements
+=
1
;
assignWork
+=
workPerElement
[
idxElement
].
amountOfWork
;
}
/// FLOG(FLog::Controller << "[Balance] Shape Thread " << idxThread << " goes from "
/// << (*intervals)[idxThread].iterator.getCurrentGlobalIndex() << " nb " << (*intervals)[idxThread].nbElements << "\n");
}
void
buildThreadIntervals
(){
// Reset the vectors
workloadP2M
.
clear
();
workloadM2M
.
clear
();
workloadM2L
.
clear
();
workloadL2L
.
clear
();
workloadL2P
.
clear
();
workloadP2P
.
clear
();
// Count the number of leaves and color elements
int
shapeLeaves
[
SizeShape
]
=
{
0
};
int
leafsNumber
=
0
;
{
typename
OctreeClass
::
Iterator
octreeIterator
(
tree
);
octreeIterator
.
gotoBottomLeft
();
do
{
++
leafsNumber
;
const
FTreeCoordinate
&
coord
=
octreeIterator
.
getCurrentCell
()
->
getCoordinate
();
++
shapeLeaves
[(
coord
.
getX
()
%
3
)
*
9
+
(
coord
.
getY
()
%
3
)
*
3
+
(
coord
.
getZ
()
%
3
)];
}
while
(
octreeIterator
.
moveRight
());
}
// Allocate the working buffer
std
::
unique_ptr
<
WorkloadTemp
[]
>
workloadBuffer
(
new
WorkloadTemp
[
leafsNumber
]);
{
// Prepare P2M
/// FLOG(FLog::Controller << "[Balance] P2M:\n");
typename
OctreeClass
::
Iterator
octreeIterator
(
tree
);
octreeIterator
.
gotoBottomLeft
();
FSize
idxLeaf
=
0
;
FSize
totalWork
=
0
;
do
{
// Keep track of tree iterator
workloadBuffer
[
idxLeaf
].
iterator
=
octreeIterator
;
// Count the nb of particles as amount of work in the leaf
workloadBuffer
[
idxLeaf
].
amountOfWork
=
octreeIterator
.
getCurrentListSrc
()
->
getNbParticles
();
// Keep the total amount of work
totalWork
+=
workloadBuffer
[
idxLeaf
].
amountOfWork
;
++
idxLeaf
;
}
while
(
octreeIterator
.
moveRight
());
generateIntervalFromWorkload
(
&
workloadP2M
,
totalWork
,
workloadBuffer
.
get
(),
idxLeaf
);
}
{
// Prepare L2P
/// FLOG(FLog::Controller << "[Balance] L2P:\n");
typename
OctreeClass
::
Iterator
octreeIterator
(
tree
);
octreeIterator
.
gotoBottomLeft
();
FSize
idxLeaf
=
0
;
FSize
totalWork
=
0
;
do
{
// Keep track of tree iterator
workloadBuffer
[
idxLeaf
].
iterator
=
octreeIterator
;
// Count the nb of particles as amount of work in the leaf
workloadBuffer
[
idxLeaf
].
amountOfWork
=
octreeIterator
.
getCurrentListTargets
()
->
getNbParticles
();
// Keep the total amount of work
totalWork
+=
workloadBuffer
[
idxLeaf
].
amountOfWork
;
++
idxLeaf
;
}
while
(
octreeIterator
.
moveRight
());
generateIntervalFromWorkload
(
&
workloadL2P
,
totalWork
,
workloadBuffer
.
get
(),
idxLeaf
);
}
{
// Do it for the M2L
/// FLOG(FLog::Controller << "[Balance] M2L:\n");
workloadM2L
.
resize
(
OctreeHeight
);
typename
OctreeClass
::
Iterator
avoidGotoLeftIterator
(
tree
);
avoidGotoLeftIterator
.
gotoBottomLeft
();
const
CellClass
*
neighbors
[
343
];
for
(
int
idxLevel
=
OctreeHeight
-
1
;
idxLevel
>=
2
;
--
idxLevel
){
FLOG
(
FLog
::
Controller
<<
"[Balance]
\t
level "
<<
idxLevel
<<
":
\n
"
);
typename
OctreeClass
::
Iterator
octreeIterator
(
avoidGotoLeftIterator
);
avoidGotoLeftIterator
.
moveUp
();
FSize
idxCell
=
0
;
FSize
totalWork
=
0
;
do
{
// Keep track of tree iterator
workloadBuffer
[
idxCell
].
iterator
=
octreeIterator
;
// Count the nb of M2L for this cell
workloadBuffer
[
idxCell
].
amountOfWork
=
tree
->
getInteractionNeighbors
(
neighbors
,
octreeIterator
.
getCurrentGlobalCoordinate
(),
idxLevel
,
1
);
// Keep the total amount of work
totalWork
+=
workloadBuffer
[
idxCell
].
amountOfWork
;
++
idxCell
;
}
while
(
octreeIterator
.
moveRight
());
// Now split between thread
generateIntervalFromWorkload
(
&
workloadM2L
[
idxLevel
],
totalWork
,
workloadBuffer
.
get
(),
idxCell
);
}
}
{
// Do it for the M2M L2L
/// FLOG(FLog::Controller << "[Balance] M2M L2L:\n");
workloadM2M
.
resize
(
OctreeHeight
);
workloadL2L
.
resize
(
OctreeHeight
);
typename
OctreeClass
::
Iterator
avoidGotoLeftIterator
(
tree
);
avoidGotoLeftIterator
.
gotoBottomLeft
();
avoidGotoLeftIterator
.
moveUp
();
for
(
int
idxLevel
=
OctreeHeight
-
2
;
idxLevel
>=
2
;
--
idxLevel
){
FLOG
(
FLog
::
Controller
<<
"[Balance]
\t
level "
<<
idxLevel
<<
":
\n
"
);
typename
OctreeClass
::
Iterator
octreeIterator
(
avoidGotoLeftIterator
);
avoidGotoLeftIterator
.
moveUp
();
FSize
idxCell
=
0
;
FSize
totalWork
=
0
;
do
{
// Keep track of tree iterator
workloadBuffer
[
idxCell
].
iterator
=
octreeIterator
;
// Count the nb of children of the current cell
workloadBuffer
[
idxCell
].
amountOfWork
=
0
;
CellClass
**
child
=
octreeIterator
.
getCurrentChild
();
for
(
int
idxChild
=
0
;
idxChild
<
8
;
++
idxChild
){
if
(
child
[
idxChild
])
workloadBuffer
[
idxCell
].
amountOfWork
+=
1
;
}
// Keep the total amount of work
totalWork
+=
workloadBuffer
[
idxCell
].
amountOfWork
;
++
idxCell
;
}
while
(
octreeIterator
.
moveRight
());
// Now split between thread
generateIntervalFromWorkload
(
&
workloadM2M
[
idxLevel
],
totalWork
,
workloadBuffer
.
get
(),
idxCell
);
generateIntervalFromWorkload
(
&
workloadL2L
[
idxLevel
],
totalWork
,
workloadBuffer
.
get
(),
idxCell
);
}
}
{
// Prepare the P2P
const
int
LeafIndex
=
OctreeHeight
-
1
;
leafsDataArray
.
reset
(
new
LeafData
[
leafsNumber
]);
// We need the offset for each color
int
startPosAtShape
[
SizeShape
]
=
{
0
};
for
(
int
idxShape
=
1
;
idxShape
<
SizeShape
;
++
idxShape
){
startPosAtShape
[
idxShape
]
=
startPosAtShape
[
idxShape
-
1
]
+
shapeLeaves
[
idxShape
-
1
];
}
// Prepare each color
typename
OctreeClass
::
Iterator
octreeIterator
(
tree
);
octreeIterator
.
gotoBottomLeft
();
FSize
workPerShape
[
SizeShape
]
=
{
0
};
// for each leafs
for
(
int
idxLeaf
=
0
;
idxLeaf
<
leafsNumber
;
++
idxLeaf
){
const
FTreeCoordinate
&
coord
=
octreeIterator
.
getCurrentGlobalCoordinate
();
const
int
shapePosition
=
(
coord
.
getX
()
%
3
)
*
9
+
(
coord
.
getY
()
%
3
)
*
3
+
(
coord
.
getZ
()
%
3
);
const
int
positionToWork
=
startPosAtShape
[
shapePosition
]
++
;
leafsDataArray
[
positionToWork
].
index
=
octreeIterator
.
getCurrentGlobalIndex
();
leafsDataArray
[
positionToWork
].
coord
=
coord
;
leafsDataArray
[
positionToWork
].
targets
=
octreeIterator
.
getCurrentListTargets
();
leafsDataArray
[
positionToWork
].
sources
=
octreeIterator
.
getCurrentListSrc
();
// For now the cost is simply based on the number of particles
const
FSize
nbPartInLeaf
=
octreeIterator
.
getCurrentListTargets
()
->
getNbParticles
();
workloadBuffer
[
positionToWork
].
amountOfWork
=
nbPartInLeaf
*
nbPartInLeaf
;
ContainerClass
*
neighbors
[
27
];
tree
->
getLeafsNeighbors
(
neighbors
,
octreeIterator
.
getCurrentGlobalCoordinate
(),
LeafIndex
);
for
(
int
idxNeigh
=
0
;
idxNeigh
<
27
;
++
idxNeigh
){
if
(
neighbors
[
idxNeigh
]){
workloadBuffer
[
positionToWork
].
amountOfWork
+=
nbPartInLeaf
*
neighbors
[
idxNeigh
]
->
getNbParticles
();
}
}
workPerShape
[
shapePosition
]
+=
workloadBuffer
[
positionToWork
].
amountOfWork
;
octreeIterator
.
moveRight
();
}
workloadP2P
.
resize
(
SizeShape
);
int
offsetShape
=
0
;
for
(
int
idxShape
=
0
;
idxShape
<
SizeShape
;
++
idxShape
){
std
::
vector
<
std
::
pair
<
int
,
int
>>*
intervals
=
&
workloadP2P
[
idxShape
];
const
int
nbElements
=
shapeLeaves
[
idxShape
];
const
FSize
totalWork
=
workPerShape
[
idxShape
];
// Now split between thread
(
*
intervals
).
resize
(
MaxThreads
);
// Ideally each thread will have this
const
FSize
idealWork
=
(
totalWork
/
MaxThreads
);
// Assign default value for first thread
int
idxThread
=
0
;
(
*
intervals
)[
idxThread
].
first
=
offsetShape
;
FSize
assignWork
=
workloadBuffer
[
0
].
amountOfWork
;
for
(
int
idxElement
=
1
+
offsetShape
;
idxElement
<
nbElements
+
offsetShape
;
++
idxElement
){
if
(
FMath
::
Abs
((
idxThread
+
1
)
*
idealWork
-
assignWork
)
<
FMath
::
Abs
((
idxThread
+
1
)
*
idealWork
-
assignWork
-
workloadBuffer
[
idxElement
].
amountOfWork
)
&&
idxThread
!=
MaxThreads
-
1
){
(
*
intervals
)[
idxThread
].
second
=
idxElement
;
/// FLOG(FLog::Controller << "[Balance] Shape " << idxShape << " Thread " << idxThread << " goes from "
/// << (*intervals)[idxThread].first << " to " << (*intervals)[idxThread].second << "\n");
idxThread
+=
1
;
(
*
intervals
)[
idxThread
].
first
=
idxElement
;
}
assignWork
+=
workloadBuffer
[
idxElement
].
amountOfWork
;
}
(
*
intervals
)[
idxThread
].
second
=
nbElements
+
offsetShape
;
/// FLOG(FLog::Controller << "[Balance] Shape " << idxShape << " Thread " << idxThread << " goes from "
/// << (*intervals)[idxThread].first << " to " << (*intervals)[idxThread].second << "\n");
offsetShape
+=
nbElements
;
}
}
}
/////////////////////////////////////////////////////////////////////////////
// P2M
/////////////////////////////////////////////////////////////////////////////
/** Runs the P2M kernel. */
void
bottomPass
(){
FLOG
(
FLog
::
Controller
.
write
(
"
\t
Start Bottom Pass
\n
"
).
write
(
FLog
::
Flush
)
);
FLOG
(
FTic
counterTime
);
FLOG
(
FTic
computationCounter
);
#pragma omp parallel
{
KernelClass
*
const
myThreadkernels
=
kernels
[
omp_get_thread_num
()];
const
int
nbCellsToCompute
=
workloadP2M
[
omp_get_thread_num
()].
nbElements
;
typename
OctreeClass
::
Iterator
octreeIterator
(
workloadP2M
[
omp_get_thread_num
()].
iterator
);
for
(
int
idxLeafs
=
0
;
idxLeafs
<
nbCellsToCompute
;
++
idxLeafs
){
// We need the current cell that represent the leaf
// and the list of particles
myThreadkernels
->
P2M
(
octreeIterator
.
getCurrentCell
()
,
octreeIterator
.
getCurrentListSrc
());
octreeIterator
.
moveRight
();
}
}
FLOG
(
computationCounter
.
tac
()
);
FLOG
(
FLog
::
Controller
<<
"
\t
Finished (@Bottom Pass (P2M) = "
<<
counterTime
.
tacAndElapsed
()
<<
"s)
\n
"
);
FLOG
(
FLog
::
Controller
<<
"
\t\t
Computation : "
<<
computationCounter
.
elapsed
()
<<
" s
\n
"
);
}
/////////////////////////////////////////////////////////////////////////////
// Upward
/////////////////////////////////////////////////////////////////////////////
/** Runs the M2M kernel. */
void
upwardPass
(){
FLOG
(
FLog
::
Controller
.
write
(
"
\t
Start Upward Pass
\n
"
).
write
(
FLog
::
Flush
);
);
FLOG
(
FTic
counterTime
);
FLOG
(
FTic
computationCounter
);
// for each levels
for
(
int
idxLevel
=
FMath
::
Min
(
OctreeHeight
-
2
,
FAbstractAlgorithm
::
lowerWorkingLevel
-
1
)
;
idxLevel
>=
FAbstractAlgorithm
::
upperWorkingLevel
;
--
idxLevel
){
FLOG
(
FTic
counterTimeLevel
);
FLOG
(
computationCounter
.
tic
());
#pragma omp parallel
{
KernelClass
*
const
myThreadkernels
=
kernels
[
omp_get_thread_num
()];
const
int
nbCellsToCompute
=
workloadM2M
[
idxLevel
][
omp_get_thread_num
()].
nbElements
;
typename
OctreeClass
::
Iterator
octreeIterator
(
workloadM2M
[
idxLevel
][
omp_get_thread_num
()].
iterator
);
for
(
int
idxCell
=
0
;
idxCell
<
nbCellsToCompute
;
++
idxCell
){
// We need the current cell and the child
// child is an array (of 8 child) that may be null
myThreadkernels
->
M2M
(
octreeIterator
.
getCurrentCell
()
,
octreeIterator
.
getCurrentChild
(),
idxLevel
);
octreeIterator
.
moveRight
();
}
}
FLOG
(
computationCounter
.
tac
());
FLOG
(
FLog
::
Controller
<<
"
\t\t
>> Level "
<<
idxLevel
<<
" = "
<<
counterTimeLevel
.
tacAndElapsed
()
<<
"s
\n
"
);
}
FLOG
(
FLog
::
Controller
<<
"
\t
Finished (@Upward Pass (M2M) = "
<<
counterTime
.
tacAndElapsed
()
<<
"s)
\n
"
);
FLOG
(
FLog
::
Controller
<<
"
\t\t
Computation : "
<<
computationCounter
.
cumulated
()
<<
" s
\n
"
);
}
/////////////////////////////////////////////////////////////////////////////
// Transfer
/////////////////////////////////////////////////////////////////////////////
/** Runs the M2L kernel. */
void
transferPass
(){
FLOG
(
FLog
::
Controller
.
write
(
"
\t
Start Downward Pass (M2L)
\n
"
).
write
(
FLog
::
Flush
);
);
FLOG
(
FTic
counterTime
);
FLOG
(
FTic
computationCounter
);
// for each levels
for
(
int
idxLevel
=
FAbstractAlgorithm
::
upperWorkingLevel
;
idxLevel
<
FAbstractAlgorithm
::
lowerWorkingLevel
;
++
idxLevel
){
const
int
separationCriteria
=
(
idxLevel
!=
FAbstractAlgorithm
::
lowerWorkingLevel
-
1
?
1
:
leafLevelSeperationCriteria
);
FLOG
(
FTic
counterTimeLevel
);
FLOG
(
computationCounter
.
tic
());
#pragma omp parallel
{
KernelClass
*
const
myThreadkernels
=
kernels
[
omp_get_thread_num
()];
const
int
nbCellsToCompute
=
workloadM2L
[
idxLevel
][
omp_get_thread_num
()].
nbElements
;
typename
OctreeClass
::
Iterator
octreeIterator
(
workloadM2L
[
idxLevel
][
omp_get_thread_num
()].
iterator
);
const
CellClass
*
neighbors
[
343
];
for
(
int
idxCell
=
0
;
idxCell
<
nbCellsToCompute
;
++
idxCell
){
const
int
counter
=
tree
->
getInteractionNeighbors
(
neighbors
,
octreeIterator
.
getCurrentGlobalCoordinate
(),
idxLevel
,
separationCriteria
);
if
(
counter
)
myThreadkernels
->
M2L
(
octreeIterator
.
getCurrentCell
()
,
neighbors
,
counter
,
idxLevel
);
octreeIterator
.
moveRight
();
}
myThreadkernels
->
finishedLevelM2L
(
idxLevel
);
}
FLOG
(
computationCounter
.
tac
());
FLOG
(
FLog
::
Controller
<<
"
\t\t
>> Level "
<<
idxLevel
<<
" = "
<<
counterTimeLevel
.
tacAndElapsed
()
<<
"s
\n
"
);
}
FLOG
(
FLog
::
Controller
<<
"
\t
Finished (@Downward Pass (M2L) = "
<<
counterTime
.
tacAndElapsed
()
<<
"s)
\n
"
);
FLOG
(
FLog
::
Controller
<<
"
\t\t
Computation : "
<<
computationCounter
.
cumulated
()
<<
" s
\n
"
);
}
/////////////////////////////////////////////////////////////////////////////
// Downward
/////////////////////////////////////////////////////////////////////////////
/** Runs the L2L kernel. */
void
downardPass
(){
FLOG
(
FLog
::
Controller
.
write
(
"
\t
Start Downward Pass (L2L)
\n
"
).
write
(
FLog
::
Flush
);
);
FLOG
(
FTic
counterTime
);
FLOG
(
FTic
computationCounter
);
const
int
heightMinusOne
=
FAbstractAlgorithm
::
lowerWorkingLevel
-
1
;
// for each levels excepted leaf level
for
(
int
idxLevel
=
FAbstractAlgorithm
::
upperWorkingLevel
;
idxLevel
<
heightMinusOne
;
++
idxLevel
){
FLOG
(
FTic
counterTimeLevel
);
FLOG
(
computationCounter
.
tic
());
#pragma omp parallel
{
KernelClass
*
const
myThreadkernels
=
kernels
[
omp_get_thread_num
()];
const
int
nbCellsToCompute
=
workloadL2L
[
idxLevel
][
omp_get_thread_num
()].
nbElements
;
typename
OctreeClass
::
Iterator
octreeIterator
(
workloadL2L
[
idxLevel
][
omp_get_thread_num
()].
iterator
);
for
(
int
idxCell
=
0
;
idxCell
<
nbCellsToCompute
;
++
idxCell
){
myThreadkernels
->
L2L
(
octreeIterator
.
getCurrentCell
()
,
octreeIterator
.
getCurrentChild
(),
idxLevel
);
octreeIterator
.
moveRight
();
}
}
FLOG
(
computationCounter
.
tac
());
FLOG
(
FLog
::
Controller
<<
"
\t\t
>> Level "
<<
idxLevel
<<
" = "
<<
counterTimeLevel
.
tacAndElapsed
()
<<
"s
\n
"
);
}
FLOG
(
FLog
::
Controller
<<
"
\t
Finished (@Downward Pass (L2L) = "
<<
counterTime
.
tacAndElapsed
()
<<
"s)
\n
"
);
FLOG
(
FLog
::
Controller
<<
"
\t\t
Computation : "
<<
computationCounter
.
cumulated
()
<<
" s
\n
"
);
}
/////////////////////////////////////////////////////////////////////////////
// Direct
/////////////////////////////////////////////////////////////////////////////
void
L2P
(){
#pragma omp parallel
{
KernelClass
*
const
myThreadkernels
=
kernels
[
omp_get_thread_num
()];
const
int
nbCellsToCompute
=
workloadL2P
[
omp_get_thread_num
()].
nbElements
;
typename
OctreeClass
::
Iterator
octreeIterator
(
workloadL2P
[
omp_get_thread_num
()].
iterator
);
for
(
int
idxLeafs
=
0
;
idxLeafs
<
nbCellsToCompute
;
++
idxLeafs
){
// We need the current cell that represent the leaf
// and the list of particles
myThreadkernels
->
L2P
(
octreeIterator
.
getCurrentCell
()
,
octreeIterator
.
getCurrentListTargets
());
octreeIterator
.
moveRight
();
}
}
}
/** Runs the P2P kernel.
*
* \param p2pEnabled Run the P2P kernel.
* \param l2pEnabled Run the L2P kernel.
*/
void
directPass
(){
FLOG
(
FLog
::
Controller
.
write
(
"
\t
Start Direct Pass
\n
"
).
write
(
FLog
::
Flush
);
);
FLOG
(
FTic
counterTime
);
FLOG
(
FTic
computationCounter
);
FLOG
(
FTic
computationCounterP2P
);
const
int
LeafIndex
=
OctreeHeight
-
1
;
#pragma omp parallel
{
FLOG
(
if
(
!
omp_get_thread_num
())
computationCounter
.
tic
());
KernelClass
&
myThreadkernels
=
(
*
kernels
[
omp_get_thread_num
()]);
// There is a maximum of 26 neighbors
ContainerClass
*
neighbors
[
27
];
for
(
int
idxShape
=
0
;
idxShape
<
SizeShape
;
++
idxShape
){
const
std
::
pair
<
int
,
int
>
interval
=
workloadP2P
[
idxShape
][
omp_get_thread_num
()];
for
(
int
idxLeafs
=
interval
.
first
;
idxLeafs
<
interval
.
second
;
++
idxLeafs
){
LeafData
&
currentIter
=
leafsDataArray
[
idxLeafs
];
// need the current particles and neighbors particles
FLOG
(
if
(
!
omp_get_thread_num
())
computationCounterP2P
.
tic
());
const
int
counter
=
tree
->
getLeafsNeighbors
(
neighbors
,
currentIter
.
coord
,
LeafIndex
);
myThreadkernels
.
P2P
(
currentIter
.
coord
,
currentIter
.
targets
,
currentIter
.
sources
,
neighbors
,
counter
);
FLOG
(
if
(
!
omp_get_thread_num
())
computationCounterP2P
.
tac
());
}
}
}
FLOG
(
computationCounter
.
tac
());
FLOG
(
FLog
::
Controller
<<
"
\t
Finished (@Direct Pass (L2P + P2P) = "
<<
counterTime
.
tacAndElapsed
()
<<
"s)
\n
"
);
FLOG
(
FLog
::
Controller
<<
"
\t\t
Computation L2P + P2P : "
<<
computationCounter
.
cumulated
()
<<
" s
\n
"
);
FLOG
(
FLog
::
Controller
<<
"
\t\t
Computation P2P : "
<<
computationCounterP2P
.
cumulated
()
<<
" s
\n
"
);
}
};
#endif
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment