Mentions légales du service
Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
S
ScalFMM
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
solverstack
ScalFMM
Commits
24ced909
Commit
24ced909
authored
10 years ago
by
BRAMAS Berenger
Browse files
Options
Downloads
Patches
Plain Diff
Build intervals in task
parent
e47c9ae9
Branches
Branches containing commit
Tags
Tags containing commit
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
Src/Core/FFmmAlgorithmThreadBalance.hpp
+206
-167
206 additions, 167 deletions
Src/Core/FFmmAlgorithmThreadBalance.hpp
with
206 additions
and
167 deletions
Src/Core/FFmmAlgorithmThreadBalance.hpp
+
206
−
167
View file @
24ced909
...
...
@@ -203,191 +203,230 @@ protected:
}
// Allocate the working buffer
std
::
unique_ptr
<
WorkloadTemp
[]
>
workloadBuffer
(
new
WorkloadTemp
[
leafsNumber
]);
std
::
unique_ptr
<
WorkloadTemp
*
[]
>
workloadBufferThread
(
new
WorkloadTemp
*
[
MaxThreads
]);
memset
(
workloadBufferThread
.
get
(),
0
,
MaxThreads
*
sizeof
(
WorkloadTemp
*
));
{
// Prepare P2M
/// FLOG(FLog::Controller << "[Balance] P2M:\n");
typename
OctreeClass
::
Iterator
octreeIterator
(
tree
);
octreeIterator
.
gotoBottomLeft
();
FSize
idxLeaf
=
0
;
FSize
totalWork
=
0
;
do
{
// Keep track of tree iterator
workloadBuffer
[
idxLeaf
].
iterator
=
octreeIterator
;
// Count the nb of particles as amount of work in the leaf
workloadBuffer
[
idxLeaf
].
amountOfWork
=
octreeIterator
.
getCurrentListSrc
()
->
getNbParticles
();
// Keep the total amount of work
totalWork
+=
workloadBuffer
[
idxLeaf
].
amountOfWork
;
++
idxLeaf
;
}
while
(
octreeIterator
.
moveRight
());
generateIntervalFromWorkload
(
&
workloadP2M
,
totalWork
,
workloadBuffer
.
get
(),
idxLeaf
);
}
#pragma omp parallel
{
#pragma omp single
{
#pragma omp task
{
// Prepare P2M
if
(
workloadBufferThread
[
omp_get_thread_num
()]
==
nullptr
){
workloadBufferThread
[
omp_get_thread_num
()]
=
new
WorkloadTemp
[
leafsNumber
];
}
WorkloadTemp
*
workloadBuffer
=
workloadBufferThread
[
omp_get_thread_num
()];
/// FLOG(FLog::Controller << "[Balance] P2M:\n");
typename
OctreeClass
::
Iterator
octreeIterator
(
tree
);
octreeIterator
.
gotoBottomLeft
();
FSize
idxLeaf
=
0
;
FSize
totalWork
=
0
;
do
{
// Keep track of tree iterator
workloadBuffer
[
idxLeaf
].
iterator
=
octreeIterator
;
// Count the nb of particles as amount of work in the leaf
workloadBuffer
[
idxLeaf
].
amountOfWork
=
octreeIterator
.
getCurrentListSrc
()
->
getNbParticles
();
// Keep the total amount of work
totalWork
+=
workloadBuffer
[
idxLeaf
].
amountOfWork
;
++
idxLeaf
;
}
while
(
octreeIterator
.
moveRight
());
generateIntervalFromWorkload
(
&
workloadP2M
,
totalWork
,
workloadBuffer
,
idxLeaf
);
}
{
// Prepare L2P
/// FLOG(FLog::Controller << "[Balance] L2P:\n");
typename
OctreeClass
::
Iterator
octreeIterator
(
tree
);
octreeIterator
.
gotoBottomLeft
();
FSize
idxLeaf
=
0
;
FSize
totalWork
=
0
;
do
{
// Keep track of tree iterator
workloadBuffer
[
idxLeaf
].
iterator
=
octreeIterator
;
// Count the nb of particles as amount of work in the leaf
workloadBuffer
[
idxLeaf
].
amountOfWork
=
octreeIterator
.
getCurrentListTargets
()
->
getNbParticles
();
// Keep the total amount of work
totalWork
+=
workloadBuffer
[
idxLeaf
].
amountOfWork
;
++
idxLeaf
;
}
while
(
octreeIterator
.
moveRight
());
#pragma omp task
{
// Prepare L2P
if
(
workloadBufferThread
[
omp_get_thread_num
()]
==
nullptr
){
workloadBufferThread
[
omp_get_thread_num
()]
=
new
WorkloadTemp
[
leafsNumber
];
}
WorkloadTemp
*
workloadBuffer
=
workloadBufferThread
[
omp_get_thread_num
()];
/// FLOG(FLog::Controller << "[Balance] L2P:\n");
typename
OctreeClass
::
Iterator
octreeIterator
(
tree
);
octreeIterator
.
gotoBottomLeft
();
FSize
idxLeaf
=
0
;
FSize
totalWork
=
0
;
do
{
// Keep track of tree iterator
workloadBuffer
[
idxLeaf
].
iterator
=
octreeIterator
;
// Count the nb of particles as amount of work in the leaf
workloadBuffer
[
idxLeaf
].
amountOfWork
=
octreeIterator
.
getCurrentListTargets
()
->
getNbParticles
();
// Keep the total amount of work
totalWork
+=
workloadBuffer
[
idxLeaf
].
amountOfWork
;
++
idxLeaf
;
}
while
(
octreeIterator
.
moveRight
());
generateIntervalFromWorkload
(
&
workloadL2P
,
totalWork
,
workloadBuffer
,
idxLeaf
);
}
generateIntervalFromWorkload
(
&
workloadL2P
,
totalWork
,
workloadBuffer
.
get
(),
idxLeaf
);
}
#pragma omp task
{
// Do it for the M2L
if
(
workloadBufferThread
[
omp_get_thread_num
()]
==
nullptr
){
workloadBufferThread
[
omp_get_thread_num
()]
=
new
WorkloadTemp
[
leafsNumber
];
}
WorkloadTemp
*
workloadBuffer
=
workloadBufferThread
[
omp_get_thread_num
()];
/// FLOG(FLog::Controller << "[Balance] M2L:\n");
workloadM2L
.
resize
(
OctreeHeight
);
typename
OctreeClass
::
Iterator
avoidGotoLeftIterator
(
tree
);
avoidGotoLeftIterator
.
gotoBottomLeft
();
const
CellClass
*
neighbors
[
343
];
for
(
int
idxLevel
=
OctreeHeight
-
1
;
idxLevel
>=
2
;
--
idxLevel
){
/// FLOG(FLog::Controller << "[Balance] \t level " << idxLevel << ":\n");
typename
OctreeClass
::
Iterator
octreeIterator
(
avoidGotoLeftIterator
);
avoidGotoLeftIterator
.
moveUp
();
FSize
idxCell
=
0
;
FSize
totalWork
=
0
;
do
{
// Keep track of tree iterator
workloadBuffer
[
idxCell
].
iterator
=
octreeIterator
;
// Count the nb of M2L for this cell
workloadBuffer
[
idxCell
].
amountOfWork
=
tree
->
getInteractionNeighbors
(
neighbors
,
octreeIterator
.
getCurrentGlobalCoordinate
(),
idxLevel
,
1
);
// Keep the total amount of work
totalWork
+=
workloadBuffer
[
idxCell
].
amountOfWork
;
++
idxCell
;
}
while
(
octreeIterator
.
moveRight
());
// Now split between thread
generateIntervalFromWorkload
(
&
workloadM2L
[
idxLevel
],
totalWork
,
workloadBuffer
,
idxCell
);
}
}
#pragma omp task
{
// Do it for the M2M L2L
if
(
workloadBufferThread
[
omp_get_thread_num
()]
==
nullptr
){
workloadBufferThread
[
omp_get_thread_num
()]
=
new
WorkloadTemp
[
leafsNumber
];
}
WorkloadTemp
*
workloadBuffer
=
workloadBufferThread
[
omp_get_thread_num
()];
/// FLOG(FLog::Controller << "[Balance] M2M L2L:\n");
workloadM2M
.
resize
(
OctreeHeight
);
workloadL2L
.
resize
(
OctreeHeight
);
typename
OctreeClass
::
Iterator
avoidGotoLeftIterator
(
tree
);
avoidGotoLeftIterator
.
gotoBottomLeft
();
avoidGotoLeftIterator
.
moveUp
();
for
(
int
idxLevel
=
OctreeHeight
-
2
;
idxLevel
>=
2
;
--
idxLevel
){
/// FLOG(FLog::Controller << "[Balance] \t level " << idxLevel << ":\n");
typename
OctreeClass
::
Iterator
octreeIterator
(
avoidGotoLeftIterator
);
avoidGotoLeftIterator
.
moveUp
();
FSize
idxCell
=
0
;
FSize
totalWork
=
0
;
do
{
// Keep track of tree iterator
workloadBuffer
[
idxCell
].
iterator
=
octreeIterator
;
// Count the nb of children of the current cell
workloadBuffer
[
idxCell
].
amountOfWork
=
0
;
CellClass
**
child
=
octreeIterator
.
getCurrentChild
();
for
(
int
idxChild
=
0
;
idxChild
<
8
;
++
idxChild
){
if
(
child
[
idxChild
])
workloadBuffer
[
idxCell
].
amountOfWork
+=
1
;
}
// Keep the total amount of work
totalWork
+=
workloadBuffer
[
idxCell
].
amountOfWork
;
++
idxCell
;
}
while
(
octreeIterator
.
moveRight
());
// Now split between thread
generateIntervalFromWorkload
(
&
workloadM2M
[
idxLevel
],
totalWork
,
workloadBuffer
,
idxCell
);
generateIntervalFromWorkload
(
&
workloadL2L
[
idxLevel
],
totalWork
,
workloadBuffer
,
idxCell
);
}
}
{
// Do it for the M2L
/// FLOG(FLog::Controller << "[Balance] M2L:\n");
workloadM2L
.
resize
(
OctreeHeight
);
typename
OctreeClass
::
Iterator
avoidGotoLeftIterator
(
tree
);
avoidGotoLeftIterator
.
gotoBottomLeft
();
const
CellClass
*
neighbors
[
343
];
for
(
int
idxLevel
=
OctreeHeight
-
1
;
idxLevel
>=
2
;
--
idxLevel
){
FLOG
(
FLog
::
Controller
<<
"[Balance]
\t
level "
<<
idxLevel
<<
":
\n
"
);
typename
OctreeClass
::
Iterator
octreeIterator
(
avoidGotoLeftIterator
);
avoidGotoLeftIterator
.
moveUp
();
FSize
idxCell
=
0
;
FSize
totalWork
=
0
;
do
{
// Keep track of tree iterator
workloadBuffer
[
idxCell
].
iterator
=
octreeIterator
;
// Count the nb of M2L for this cell
workloadBuffer
[
idxCell
].
amountOfWork
=
tree
->
getInteractionNeighbors
(
neighbors
,
octreeIterator
.
getCurrentGlobalCoordinate
(),
idxLevel
,
1
);
// Keep the total amount of work
totalWork
+=
workloadBuffer
[
idxCell
].
amountOfWork
;
++
idxCell
;
}
while
(
octreeIterator
.
moveRight
());
// Now split between thread
generateIntervalFromWorkload
(
&
workloadM2L
[
idxLevel
],
totalWork
,
workloadBuffer
.
get
(),
idxCell
);
}
}
{
// Do it for the M2M L2L
/// FLOG(FLog::Controller << "[Balance] M2M L2L:\n");
workloadM2M
.
resize
(
OctreeHeight
);
workloadL2L
.
resize
(
OctreeHeight
);
typename
OctreeClass
::
Iterator
avoidGotoLeftIterator
(
tree
);
avoidGotoLeftIterator
.
gotoBottomLeft
();
avoidGotoLeftIterator
.
moveUp
();
for
(
int
idxLevel
=
OctreeHeight
-
2
;
idxLevel
>=
2
;
--
idxLevel
){
FLOG
(
FLog
::
Controller
<<
"[Balance]
\t
level "
<<
idxLevel
<<
":
\n
"
);
typename
OctreeClass
::
Iterator
octreeIterator
(
avoidGotoLeftIterator
);
avoidGotoLeftIterator
.
moveUp
();
FSize
idxCell
=
0
;
FSize
totalWork
=
0
;
do
{
// Keep track of tree iterator
workloadBuffer
[
idxCell
].
iterator
=
octreeIterator
;
// Count the nb of children of the current cell
workloadBuffer
[
idxCell
].
amountOfWork
=
0
;
CellClass
**
child
=
octreeIterator
.
getCurrentChild
();
for
(
int
idxChild
=
0
;
idxChild
<
8
;
++
idxChild
){
if
(
child
[
idxChild
])
workloadBuffer
[
idxCell
].
amountOfWork
+=
1
;
#pragma omp task
{
if
(
workloadBufferThread
[
omp_get_thread_num
()]
==
nullptr
){
workloadBufferThread
[
omp_get_thread_num
()]
=
new
WorkloadTemp
[
leafsNumber
];
}
WorkloadTemp
*
workloadBuffer
=
workloadBufferThread
[
omp_get_thread_num
()];
// Prepare the P2P
const
int
LeafIndex
=
OctreeHeight
-
1
;
leafsDataArray
.
reset
(
new
LeafData
[
leafsNumber
]);
// We need the offset for each color
int
startPosAtShape
[
SizeShape
]
=
{
0
};
for
(
int
idxShape
=
1
;
idxShape
<
SizeShape
;
++
idxShape
){
startPosAtShape
[
idxShape
]
=
startPosAtShape
[
idxShape
-
1
]
+
shapeLeaves
[
idxShape
-
1
];
}
// Keep the total amount of work
totalWork
+=
workloadBuffer
[
idxCell
].
amountOfWork
;
++
idxCell
;
}
while
(
octreeIterator
.
moveRight
());
// Now split between thread
generateIntervalFromWorkload
(
&
workloadM2M
[
idxLevel
],
totalWork
,
workloadBuffer
.
get
(),
idxCell
);
generateIntervalFromWorkload
(
&
workloadL2L
[
idxLevel
],
totalWork
,
workloadBuffer
.
get
(),
idxCell
);
}
}
{
// Prepare the P2P
const
int
LeafIndex
=
OctreeHeight
-
1
;
leafsDataArray
.
reset
(
new
LeafData
[
leafsNumber
]);
// We need the offset for each color
int
startPosAtShape
[
SizeShape
]
=
{
0
};
for
(
int
idxShape
=
1
;
idxShape
<
SizeShape
;
++
idxShape
){
startPosAtShape
[
idxShape
]
=
startPosAtShape
[
idxShape
-
1
]
+
shapeLeaves
[
idxShape
-
1
];
}
// Prepare each color
typename
OctreeClass
::
Iterator
octreeIterator
(
tree
);
octreeIterator
.
gotoBottomLeft
();
// Prepare each color
typename
OctreeClass
::
Iterator
octreeIterator
(
tree
);
octreeIterator
.
gotoBottomLeft
();
FSize
workPerShape
[
SizeShape
]
=
{
0
};
FSize
workPerShape
[
SizeShape
]
=
{
0
};
// for each leafs
for
(
int
idxLeaf
=
0
;
idxLeaf
<
leafsNumber
;
++
idxLeaf
){
const
FTreeCoordinate
&
coord
=
octreeIterator
.
getCurrentGlobalCoordinate
();
const
int
shapePosition
=
(
coord
.
getX
()
%
3
)
*
9
+
(
coord
.
getY
()
%
3
)
*
3
+
(
coord
.
getZ
()
%
3
);
const
int
positionToWork
=
startPosAtShape
[
shapePosition
]
++
;
leafsDataArray
[
positionToWork
].
index
=
octreeIterator
.
getCurrentGlobalIndex
();
leafsDataArray
[
positionToWork
].
coord
=
coord
;
leafsDataArray
[
positionToWork
].
targets
=
octreeIterator
.
getCurrentListTargets
();
leafsDataArray
[
positionToWork
].
sources
=
octreeIterator
.
getCurrentListSrc
();
// For now the cost is simply based on the number of particles
const
FSize
nbPartInLeaf
=
octreeIterator
.
getCurrentListTargets
()
->
getNbParticles
();
workloadBuffer
[
positionToWork
].
amountOfWork
=
nbPartInLeaf
*
nbPartInLeaf
;
ContainerClass
*
neighbors
[
27
];
tree
->
getLeafsNeighbors
(
neighbors
,
octreeIterator
.
getCurrentGlobalCoordinate
(),
LeafIndex
);
for
(
int
idxNeigh
=
0
;
idxNeigh
<
27
;
++
idxNeigh
){
if
(
neighbors
[
idxNeigh
]){
workloadBuffer
[
positionToWork
].
amountOfWork
+=
nbPartInLeaf
*
neighbors
[
idxNeigh
]
->
getNbParticles
();
}
}
// for each leafs
for
(
int
idxLeaf
=
0
;
idxLeaf
<
leafsNumber
;
++
idxLeaf
){
const
FTreeCoordinate
&
coord
=
octreeIterator
.
getCurrentGlobalCoordinate
();
const
int
shapePosition
=
(
coord
.
getX
()
%
3
)
*
9
+
(
coord
.
getY
()
%
3
)
*
3
+
(
coord
.
getZ
()
%
3
);
workPerShape
[
shapePosition
]
+=
workloadBuffer
[
positionToWork
].
amountOfWork
;
const
int
positionToWork
=
startPosAtShape
[
shapePosition
]
++
;
octreeIterator
.
moveRight
();
}
leafsDataArray
[
positionToWork
].
index
=
octreeIterator
.
getCurrentGlobalIndex
();
leafsDataArray
[
positionToWork
].
coord
=
coord
;
leafsDataArray
[
positionToWork
].
targets
=
octreeIterator
.
getCurrentListTargets
();
leafsDataArray
[
positionToWork
].
sources
=
octreeIterator
.
getCurrentListSrc
();
workloadP2P
.
resize
(
SizeShape
);
int
offsetShape
=
0
;
// For now the cost is simply based on the number of particles
const
FSize
nbPartInLeaf
=
octreeIterator
.
getCurrentListTargets
()
->
getNbParticles
();
workloadBuffer
[
positionToWork
].
amountOfWork
=
nbPartInLeaf
*
nbPartInLeaf
;
ContainerClass
*
neighbors
[
27
];
tree
->
getLeafsNeighbors
(
neighbors
,
octreeIterator
.
getCurrentGlobalCoordinate
(),
LeafIndex
);
for
(
int
idxNeigh
=
0
;
idxNeigh
<
27
;
++
idxNeigh
){
if
(
neighbors
[
idxNeigh
]){
workloadBuffer
[
positionToWork
].
amountOfWork
+=
nbPartInLeaf
*
neighbors
[
idxNeigh
]
->
getNbParticles
();
}
}
workPerShape
[
shapePosition
]
+=
workloadBuffer
[
positionToWork
].
amountOfWork
;
octreeIterator
.
moveRight
();
}
workloadP2P
.
resize
(
SizeShape
);
int
offsetShape
=
0
;
for
(
int
idxShape
=
0
;
idxShape
<
SizeShape
;
++
idxShape
){
std
::
vector
<
std
::
pair
<
int
,
int
>>*
intervals
=
&
workloadP2P
[
idxShape
];
const
int
nbElements
=
shapeLeaves
[
idxShape
];
const
FSize
totalWork
=
workPerShape
[
idxShape
];
// Now split between thread
(
*
intervals
).
resize
(
MaxThreads
);
// Ideally each thread will have this
const
FSize
idealWork
=
(
totalWork
/
MaxThreads
);
// Assign default value for first thread
int
idxThread
=
0
;
(
*
intervals
)[
idxThread
].
first
=
offsetShape
;
FSize
assignWork
=
workloadBuffer
[
0
].
amountOfWork
;
for
(
int
idxElement
=
1
+
offsetShape
;
idxElement
<
nbElements
+
offsetShape
;
++
idxElement
){
if
(
FMath
::
Abs
((
idxThread
+
1
)
*
idealWork
-
assignWork
)
<
FMath
::
Abs
((
idxThread
+
1
)
*
idealWork
-
assignWork
-
workloadBuffer
[
idxElement
].
amountOfWork
)
&&
idxThread
!=
MaxThreads
-
1
){
(
*
intervals
)[
idxThread
].
second
=
idxElement
;
/// FLOG(FLog::Controller << "[Balance] Shape " << idxShape << " Thread " << idxThread << " goes from "
/// << (*intervals)[idxThread].first << " to " << (*intervals)[idxThread].second << "\n");
idxThread
+=
1
;
(
*
intervals
)[
idxThread
].
first
=
idxElement
;
}
assignWork
+=
workloadBuffer
[
idxElement
].
amountOfWork
;
}
(
*
intervals
)[
idxThread
].
second
=
nbElements
+
offsetShape
;
for
(
int
idxShape
=
0
;
idxShape
<
SizeShape
;
++
idxShape
){
std
::
vector
<
std
::
pair
<
int
,
int
>>*
intervals
=
&
workloadP2P
[
idxShape
];
const
int
nbElements
=
shapeLeaves
[
idxShape
];
const
FSize
totalWork
=
workPerShape
[
idxShape
];
// Now split between thread
(
*
intervals
).
resize
(
MaxThreads
);
// Ideally each thread will have this
const
FSize
idealWork
=
(
totalWork
/
MaxThreads
);
// Assign default value for first thread
int
idxThread
=
0
;
(
*
intervals
)[
idxThread
].
first
=
offsetShape
;
FSize
assignWork
=
workloadBuffer
[
0
].
amountOfWork
;
for
(
int
idxElement
=
1
+
offsetShape
;
idxElement
<
nbElements
+
offsetShape
;
++
idxElement
){
if
(
FMath
::
Abs
((
idxThread
+
1
)
*
idealWork
-
assignWork
)
<
FMath
::
Abs
((
idxThread
+
1
)
*
idealWork
-
assignWork
-
workloadBuffer
[
idxElement
].
amountOfWork
)
&&
idxThread
!=
MaxThreads
-
1
){
(
*
intervals
)[
idxThread
].
second
=
idxElement
;
/// FLOG(FLog::Controller << "[Balance] Shape " << idxShape << " Thread " << idxThread << " goes from "
/// << (*intervals)[idxThread].first << " to " << (*intervals)[idxThread].second << "\n");
idxThread
+=
1
;
(
*
intervals
)[
idxThread
].
first
=
idx
Element
;
offsetShape
+=
nb
Element
s
;
}
assignWork
+=
workloadBuffer
[
idxElement
].
amountOfWork
;
}
(
*
intervals
)[
idxThread
].
second
=
nbElements
+
offsetShape
;
}
/// FLOG(FLog::Controller << "[Balance] Shape " << idxShape << " Thread " << idxThread << " goes from "
/// << (*intervals)[idxThread].first << " to " << (*intervals)[idxThread].second << "\n");
#pragma omp barrier
}
offsetShape
+=
nbElements
;
}
for
(
int
idxThread
=
0
;
idxThread
<
MaxThreads
;
++
idxThread
){
delete
[]
workloadBufferThread
[
idxThread
];
}
}
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment