Mentions légales du service
Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
S
ScalFMM
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
solverstack
ScalFMM
Commits
24ced909
Commit
24ced909
authored
10 years ago
by
BRAMAS Berenger
Browse files
Options
Downloads
Patches
Plain Diff
Build intervals in task
parent
e47c9ae9
No related branches found
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
Src/Core/FFmmAlgorithmThreadBalance.hpp
+206
-167
206 additions, 167 deletions
Src/Core/FFmmAlgorithmThreadBalance.hpp
with
206 additions
and
167 deletions
Src/Core/FFmmAlgorithmThreadBalance.hpp
+
206
−
167
View file @
24ced909
...
@@ -203,191 +203,230 @@ protected:
...
@@ -203,191 +203,230 @@ protected:
}
}
// Allocate the working buffer
// Allocate the working buffer
std
::
unique_ptr
<
WorkloadTemp
[]
>
workloadBuffer
(
new
WorkloadTemp
[
leafsNumber
]);
std
::
unique_ptr
<
WorkloadTemp
*
[]
>
workloadBufferThread
(
new
WorkloadTemp
*
[
MaxThreads
]);
memset
(
workloadBufferThread
.
get
(),
0
,
MaxThreads
*
sizeof
(
WorkloadTemp
*
));
{
// Prepare P2M
#pragma omp parallel
/// FLOG(FLog::Controller << "[Balance] P2M:\n");
{
typename
OctreeClass
::
Iterator
octreeIterator
(
tree
);
#pragma omp single
octreeIterator
.
gotoBottomLeft
();
{
FSize
idxLeaf
=
0
;
#pragma omp task
FSize
totalWork
=
0
;
{
// Prepare P2M
do
{
if
(
workloadBufferThread
[
omp_get_thread_num
()]
==
nullptr
){
// Keep track of tree iterator
workloadBufferThread
[
omp_get_thread_num
()]
=
new
WorkloadTemp
[
leafsNumber
];
workloadBuffer
[
idxLeaf
].
iterator
=
octreeIterator
;
}
// Count the nb of particles as amount of work in the leaf
WorkloadTemp
*
workloadBuffer
=
workloadBufferThread
[
omp_get_thread_num
()];
workloadBuffer
[
idxLeaf
].
amountOfWork
=
octreeIterator
.
getCurrentListSrc
()
->
getNbParticles
();
// Keep the total amount of work
/// FLOG(FLog::Controller << "[Balance] P2M:\n");
totalWork
+=
workloadBuffer
[
idxLeaf
].
amountOfWork
;
typename
OctreeClass
::
Iterator
octreeIterator
(
tree
);
++
idxLeaf
;
octreeIterator
.
gotoBottomLeft
();
}
while
(
octreeIterator
.
moveRight
());
FSize
idxLeaf
=
0
;
FSize
totalWork
=
0
;
generateIntervalFromWorkload
(
&
workloadP2M
,
totalWork
,
workloadBuffer
.
get
(),
idxLeaf
);
do
{
}
// Keep track of tree iterator
workloadBuffer
[
idxLeaf
].
iterator
=
octreeIterator
;
// Count the nb of particles as amount of work in the leaf
workloadBuffer
[
idxLeaf
].
amountOfWork
=
octreeIterator
.
getCurrentListSrc
()
->
getNbParticles
();
// Keep the total amount of work
totalWork
+=
workloadBuffer
[
idxLeaf
].
amountOfWork
;
++
idxLeaf
;
}
while
(
octreeIterator
.
moveRight
());
generateIntervalFromWorkload
(
&
workloadP2M
,
totalWork
,
workloadBuffer
,
idxLeaf
);
}
{
// Prepare L2P
#pragma omp task
/// FLOG(FLog::Controller << "[Balance] L2P:\n");
{
// Prepare L2P
typename
OctreeClass
::
Iterator
octreeIterator
(
tree
);
if
(
workloadBufferThread
[
omp_get_thread_num
()]
==
nullptr
){
octreeIterator
.
gotoBottomLeft
();
workloadBufferThread
[
omp_get_thread_num
()]
=
new
WorkloadTemp
[
leafsNumber
];
FSize
idxLeaf
=
0
;
}
FSize
totalWork
=
0
;
WorkloadTemp
*
workloadBuffer
=
workloadBufferThread
[
omp_get_thread_num
()];
do
{
/// FLOG(FLog::Controller << "[Balance] L2P:\n");
// Keep track of tree iterator
typename
OctreeClass
::
Iterator
octreeIterator
(
tree
);
workloadBuffer
[
idxLeaf
].
iterator
=
octreeIterator
;
octreeIterator
.
gotoBottomLeft
();
// Count the nb of particles as amount of work in the leaf
FSize
idxLeaf
=
0
;
workloadBuffer
[
idxLeaf
].
amountOfWork
=
octreeIterator
.
getCurrentListTargets
()
->
getNbParticles
();
FSize
totalWork
=
0
;
// Keep the total amount of work
do
{
totalWork
+=
workloadBuffer
[
idxLeaf
].
amountOfWork
;
// Keep track of tree iterator
++
idxLeaf
;
workloadBuffer
[
idxLeaf
].
iterator
=
octreeIterator
;
}
while
(
octreeIterator
.
moveRight
());
// Count the nb of particles as amount of work in the leaf
workloadBuffer
[
idxLeaf
].
amountOfWork
=
octreeIterator
.
getCurrentListTargets
()
->
getNbParticles
();
// Keep the total amount of work
totalWork
+=
workloadBuffer
[
idxLeaf
].
amountOfWork
;
++
idxLeaf
;
}
while
(
octreeIterator
.
moveRight
());
generateIntervalFromWorkload
(
&
workloadL2P
,
totalWork
,
workloadBuffer
,
idxLeaf
);
}
generateIntervalFromWorkload
(
&
workloadL2P
,
totalWork
,
workloadBuffer
.
get
(),
idxLeaf
);
#pragma omp task
}
{
// Do it for the M2L
if
(
workloadBufferThread
[
omp_get_thread_num
()]
==
nullptr
){
workloadBufferThread
[
omp_get_thread_num
()]
=
new
WorkloadTemp
[
leafsNumber
];
}
WorkloadTemp
*
workloadBuffer
=
workloadBufferThread
[
omp_get_thread_num
()];
/// FLOG(FLog::Controller << "[Balance] M2L:\n");
workloadM2L
.
resize
(
OctreeHeight
);
typename
OctreeClass
::
Iterator
avoidGotoLeftIterator
(
tree
);
avoidGotoLeftIterator
.
gotoBottomLeft
();
const
CellClass
*
neighbors
[
343
];
for
(
int
idxLevel
=
OctreeHeight
-
1
;
idxLevel
>=
2
;
--
idxLevel
){
/// FLOG(FLog::Controller << "[Balance] \t level " << idxLevel << ":\n");
typename
OctreeClass
::
Iterator
octreeIterator
(
avoidGotoLeftIterator
);
avoidGotoLeftIterator
.
moveUp
();
FSize
idxCell
=
0
;
FSize
totalWork
=
0
;
do
{
// Keep track of tree iterator
workloadBuffer
[
idxCell
].
iterator
=
octreeIterator
;
// Count the nb of M2L for this cell
workloadBuffer
[
idxCell
].
amountOfWork
=
tree
->
getInteractionNeighbors
(
neighbors
,
octreeIterator
.
getCurrentGlobalCoordinate
(),
idxLevel
,
1
);
// Keep the total amount of work
totalWork
+=
workloadBuffer
[
idxCell
].
amountOfWork
;
++
idxCell
;
}
while
(
octreeIterator
.
moveRight
());
// Now split between thread
generateIntervalFromWorkload
(
&
workloadM2L
[
idxLevel
],
totalWork
,
workloadBuffer
,
idxCell
);
}
}
#pragma omp task
{
// Do it for the M2M L2L
if
(
workloadBufferThread
[
omp_get_thread_num
()]
==
nullptr
){
workloadBufferThread
[
omp_get_thread_num
()]
=
new
WorkloadTemp
[
leafsNumber
];
}
WorkloadTemp
*
workloadBuffer
=
workloadBufferThread
[
omp_get_thread_num
()];
/// FLOG(FLog::Controller << "[Balance] M2M L2L:\n");
workloadM2M
.
resize
(
OctreeHeight
);
workloadL2L
.
resize
(
OctreeHeight
);
typename
OctreeClass
::
Iterator
avoidGotoLeftIterator
(
tree
);
avoidGotoLeftIterator
.
gotoBottomLeft
();
avoidGotoLeftIterator
.
moveUp
();
for
(
int
idxLevel
=
OctreeHeight
-
2
;
idxLevel
>=
2
;
--
idxLevel
){
/// FLOG(FLog::Controller << "[Balance] \t level " << idxLevel << ":\n");
typename
OctreeClass
::
Iterator
octreeIterator
(
avoidGotoLeftIterator
);
avoidGotoLeftIterator
.
moveUp
();
FSize
idxCell
=
0
;
FSize
totalWork
=
0
;
do
{
// Keep track of tree iterator
workloadBuffer
[
idxCell
].
iterator
=
octreeIterator
;
// Count the nb of children of the current cell
workloadBuffer
[
idxCell
].
amountOfWork
=
0
;
CellClass
**
child
=
octreeIterator
.
getCurrentChild
();
for
(
int
idxChild
=
0
;
idxChild
<
8
;
++
idxChild
){
if
(
child
[
idxChild
])
workloadBuffer
[
idxCell
].
amountOfWork
+=
1
;
}
// Keep the total amount of work
totalWork
+=
workloadBuffer
[
idxCell
].
amountOfWork
;
++
idxCell
;
}
while
(
octreeIterator
.
moveRight
());
// Now split between thread
generateIntervalFromWorkload
(
&
workloadM2M
[
idxLevel
],
totalWork
,
workloadBuffer
,
idxCell
);
generateIntervalFromWorkload
(
&
workloadL2L
[
idxLevel
],
totalWork
,
workloadBuffer
,
idxCell
);
}
}
{
// Do it for the M2L
#pragma omp task
/// FLOG(FLog::Controller << "[Balance] M2L:\n");
{
workloadM2L
.
resize
(
OctreeHeight
);
if
(
workloadBufferThread
[
omp_get_thread_num
()]
==
nullptr
){
typename
OctreeClass
::
Iterator
avoidGotoLeftIterator
(
tree
);
workloadBufferThread
[
omp_get_thread_num
()]
=
new
WorkloadTemp
[
leafsNumber
];
avoidGotoLeftIterator
.
gotoBottomLeft
();
}
WorkloadTemp
*
workloadBuffer
=
workloadBufferThread
[
omp_get_thread_num
()];
const
CellClass
*
neighbors
[
343
];
// Prepare the P2P
const
int
LeafIndex
=
OctreeHeight
-
1
;
for
(
int
idxLevel
=
OctreeHeight
-
1
;
idxLevel
>=
2
;
--
idxLevel
){
leafsDataArray
.
reset
(
new
LeafData
[
leafsNumber
]);
FLOG
(
FLog
::
Controller
<<
"[Balance]
\t
level "
<<
idxLevel
<<
":
\n
"
);
typename
OctreeClass
::
Iterator
octreeIterator
(
avoidGotoLeftIterator
);
// We need the offset for each color
avoidGotoLeftIterator
.
moveUp
();
int
startPosAtShape
[
SizeShape
]
=
{
0
};
for
(
int
idxShape
=
1
;
idxShape
<
SizeShape
;
++
idxShape
){
FSize
idxCell
=
0
;
startPosAtShape
[
idxShape
]
=
startPosAtShape
[
idxShape
-
1
]
+
shapeLeaves
[
idxShape
-
1
];
FSize
totalWork
=
0
;
do
{
// Keep track of tree iterator
workloadBuffer
[
idxCell
].
iterator
=
octreeIterator
;
// Count the nb of M2L for this cell
workloadBuffer
[
idxCell
].
amountOfWork
=
tree
->
getInteractionNeighbors
(
neighbors
,
octreeIterator
.
getCurrentGlobalCoordinate
(),
idxLevel
,
1
);
// Keep the total amount of work
totalWork
+=
workloadBuffer
[
idxCell
].
amountOfWork
;
++
idxCell
;
}
while
(
octreeIterator
.
moveRight
());
// Now split between thread
generateIntervalFromWorkload
(
&
workloadM2L
[
idxLevel
],
totalWork
,
workloadBuffer
.
get
(),
idxCell
);
}
}
{
// Do it for the M2M L2L
/// FLOG(FLog::Controller << "[Balance] M2M L2L:\n");
workloadM2M
.
resize
(
OctreeHeight
);
workloadL2L
.
resize
(
OctreeHeight
);
typename
OctreeClass
::
Iterator
avoidGotoLeftIterator
(
tree
);
avoidGotoLeftIterator
.
gotoBottomLeft
();
avoidGotoLeftIterator
.
moveUp
();
for
(
int
idxLevel
=
OctreeHeight
-
2
;
idxLevel
>=
2
;
--
idxLevel
){
FLOG
(
FLog
::
Controller
<<
"[Balance]
\t
level "
<<
idxLevel
<<
":
\n
"
);
typename
OctreeClass
::
Iterator
octreeIterator
(
avoidGotoLeftIterator
);
avoidGotoLeftIterator
.
moveUp
();
FSize
idxCell
=
0
;
FSize
totalWork
=
0
;
do
{
// Keep track of tree iterator
workloadBuffer
[
idxCell
].
iterator
=
octreeIterator
;
// Count the nb of children of the current cell
workloadBuffer
[
idxCell
].
amountOfWork
=
0
;
CellClass
**
child
=
octreeIterator
.
getCurrentChild
();
for
(
int
idxChild
=
0
;
idxChild
<
8
;
++
idxChild
){
if
(
child
[
idxChild
])
workloadBuffer
[
idxCell
].
amountOfWork
+=
1
;
}
}
// Keep the total amount of work
totalWork
+=
workloadBuffer
[
idxCell
].
amountOfWork
;
++
idxCell
;
}
while
(
octreeIterator
.
moveRight
());
// Now split between thread
generateIntervalFromWorkload
(
&
workloadM2M
[
idxLevel
],
totalWork
,
workloadBuffer
.
get
(),
idxCell
);
generateIntervalFromWorkload
(
&
workloadL2L
[
idxLevel
],
totalWork
,
workloadBuffer
.
get
(),
idxCell
);
}
}
{
// Prepare each color
// Prepare the P2P
typename
OctreeClass
::
Iterator
octreeIterator
(
tree
);
const
int
LeafIndex
=
OctreeHeight
-
1
;
octreeIterator
.
gotoBottomLeft
();
leafsDataArray
.
reset
(
new
LeafData
[
leafsNumber
]);
// We need the offset for each color
int
startPosAtShape
[
SizeShape
]
=
{
0
};
for
(
int
idxShape
=
1
;
idxShape
<
SizeShape
;
++
idxShape
){
startPosAtShape
[
idxShape
]
=
startPosAtShape
[
idxShape
-
1
]
+
shapeLeaves
[
idxShape
-
1
];
}
// Prepare each color
FSize
workPerShape
[
SizeShape
]
=
{
0
};
typename
OctreeClass
::
Iterator
octreeIterator
(
tree
);
octreeIterator
.
gotoBottomLeft
();
FSize
workPerShape
[
SizeShape
]
=
{
0
};
// for each leafs
for
(
int
idxLeaf
=
0
;
idxLeaf
<
leafsNumber
;
++
idxLeaf
){
// for each leafs
const
FTreeCoordinate
&
coord
=
octreeIterator
.
getCurrentGlobalCoordinate
();
for
(
int
idxLeaf
=
0
;
idxLeaf
<
leafsNumber
;
++
idxLeaf
){
const
int
shapePosition
=
(
coord
.
getX
()
%
3
)
*
9
+
(
coord
.
getY
()
%
3
)
*
3
+
(
coord
.
getZ
()
%
3
);
const
FTreeCoordinate
&
coord
=
octreeIterator
.
getCurrentGlobalCoordinate
();
const
int
shapePosition
=
(
coord
.
getX
()
%
3
)
*
9
+
(
coord
.
getY
()
%
3
)
*
3
+
(
coord
.
getZ
()
%
3
);
const
int
positionToWork
=
startPosAtShape
[
shapePosition
]
++
;
leafsDataArray
[
positionToWork
].
index
=
octreeIterator
.
getCurrentGlobalIndex
();
leafsDataArray
[
positionToWork
].
coord
=
coord
;
leafsDataArray
[
positionToWork
].
targets
=
octreeIterator
.
getCurrentListTargets
();
leafsDataArray
[
positionToWork
].
sources
=
octreeIterator
.
getCurrentListSrc
();
// For now the cost is simply based on the number of particles
const
FSize
nbPartInLeaf
=
octreeIterator
.
getCurrentListTargets
()
->
getNbParticles
();
workloadBuffer
[
positionToWork
].
amountOfWork
=
nbPartInLeaf
*
nbPartInLeaf
;
ContainerClass
*
neighbors
[
27
];
tree
->
getLeafsNeighbors
(
neighbors
,
octreeIterator
.
getCurrentGlobalCoordinate
(),
LeafIndex
);
for
(
int
idxNeigh
=
0
;
idxNeigh
<
27
;
++
idxNeigh
){
if
(
neighbors
[
idxNeigh
]){
workloadBuffer
[
positionToWork
].
amountOfWork
+=
nbPartInLeaf
*
neighbors
[
idxNeigh
]
->
getNbParticles
();
}
}
workPerShape
[
shapePosition
]
+=
workloadBuffer
[
positionToWork
].
amountOfWork
;
const
int
positionToWork
=
startPosAtShape
[
shapePosition
]
++
;
octreeIterator
.
moveRight
();
leafsDataArray
[
positionToWork
].
index
=
octreeIterator
.
getCurrentGlobalIndex
();
}
leafsDataArray
[
positionToWork
].
coord
=
coord
;
leafsDataArray
[
positionToWork
].
targets
=
octreeIterator
.
getCurrentListTargets
();
leafsDataArray
[
positionToWork
].
sources
=
octreeIterator
.
getCurrentListSrc
();
workloadP2P
.
resize
(
SizeShape
);
// For now the cost is simply based on the number of particles
int
offsetShape
=
0
;
const
FSize
nbPartInLeaf
=
octreeIterator
.
getCurrentListTargets
()
->
getNbParticles
();
workloadBuffer
[
positionToWork
].
amountOfWork
=
nbPartInLeaf
*
nbPartInLeaf
;
ContainerClass
*
neighbors
[
27
];
tree
->
getLeafsNeighbors
(
neighbors
,
octreeIterator
.
getCurrentGlobalCoordinate
(),
LeafIndex
);
for
(
int
idxNeigh
=
0
;
idxNeigh
<
27
;
++
idxNeigh
){
if
(
neighbors
[
idxNeigh
]){
workloadBuffer
[
positionToWork
].
amountOfWork
+=
nbPartInLeaf
*
neighbors
[
idxNeigh
]
->
getNbParticles
();
}
}
workPerShape
[
shapePosition
]
+=
workloadBuffer
[
positionToWork
].
amountOfWork
;
octreeIterator
.
moveRight
();
}
workloadP2P
.
resize
(
SizeShape
);
int
offsetShape
=
0
;
for
(
int
idxShape
=
0
;
idxShape
<
SizeShape
;
++
idxShape
){
std
::
vector
<
std
::
pair
<
int
,
int
>>*
intervals
=
&
workloadP2P
[
idxShape
];
const
int
nbElements
=
shapeLeaves
[
idxShape
];
const
FSize
totalWork
=
workPerShape
[
idxShape
];
// Now split between thread
(
*
intervals
).
resize
(
MaxThreads
);
// Ideally each thread will have this
const
FSize
idealWork
=
(
totalWork
/
MaxThreads
);
// Assign default value for first thread
int
idxThread
=
0
;
(
*
intervals
)[
idxThread
].
first
=
offsetShape
;
FSize
assignWork
=
workloadBuffer
[
0
].
amountOfWork
;
for
(
int
idxElement
=
1
+
offsetShape
;
idxElement
<
nbElements
+
offsetShape
;
++
idxElement
){
if
(
FMath
::
Abs
((
idxThread
+
1
)
*
idealWork
-
assignWork
)
<
FMath
::
Abs
((
idxThread
+
1
)
*
idealWork
-
assignWork
-
workloadBuffer
[
idxElement
].
amountOfWork
)
&&
idxThread
!=
MaxThreads
-
1
){
(
*
intervals
)[
idxThread
].
second
=
idxElement
;
/// FLOG(FLog::Controller << "[Balance] Shape " << idxShape << " Thread " << idxThread << " goes from "
/// << (*intervals)[idxThread].first << " to " << (*intervals)[idxThread].second << "\n");
idxThread
+=
1
;
(
*
intervals
)[
idxThread
].
first
=
idxElement
;
}
assignWork
+=
workloadBuffer
[
idxElement
].
amountOfWork
;
}
(
*
intervals
)[
idxThread
].
second
=
nbElements
+
offsetShape
;
for
(
int
idxShape
=
0
;
idxShape
<
SizeShape
;
++
idxShape
){
std
::
vector
<
std
::
pair
<
int
,
int
>>*
intervals
=
&
workloadP2P
[
idxShape
];
const
int
nbElements
=
shapeLeaves
[
idxShape
];
const
FSize
totalWork
=
workPerShape
[
idxShape
];
// Now split between thread
(
*
intervals
).
resize
(
MaxThreads
);
// Ideally each thread will have this
const
FSize
idealWork
=
(
totalWork
/
MaxThreads
);
// Assign default value for first thread
int
idxThread
=
0
;
(
*
intervals
)[
idxThread
].
first
=
offsetShape
;
FSize
assignWork
=
workloadBuffer
[
0
].
amountOfWork
;
for
(
int
idxElement
=
1
+
offsetShape
;
idxElement
<
nbElements
+
offsetShape
;
++
idxElement
){
if
(
FMath
::
Abs
((
idxThread
+
1
)
*
idealWork
-
assignWork
)
<
FMath
::
Abs
((
idxThread
+
1
)
*
idealWork
-
assignWork
-
workloadBuffer
[
idxElement
].
amountOfWork
)
&&
idxThread
!=
MaxThreads
-
1
){
(
*
intervals
)[
idxThread
].
second
=
idxElement
;
/// FLOG(FLog::Controller << "[Balance] Shape " << idxShape << " Thread " << idxThread << " goes from "
/// FLOG(FLog::Controller << "[Balance] Shape " << idxShape << " Thread " << idxThread << " goes from "
/// << (*intervals)[idxThread].first << " to " << (*intervals)[idxThread].second << "\n");
/// << (*intervals)[idxThread].first << " to " << (*intervals)[idxThread].second << "\n");
idxThread
+=
1
;
(
*
intervals
)[
idxThread
].
first
=
idx
Element
;
offsetShape
+=
nb
Element
s
;
}
}
assignWork
+=
workloadBuffer
[
idxElement
].
amountOfWork
;
}
}
(
*
intervals
)[
idxThread
].
second
=
nbElements
+
offsetShape
;
}
/// FLOG(FLog::Controller << "[Balance] Shape " << idxShape << " Thread " << idxThread << " goes from "
#pragma omp barrier
/// << (*intervals)[idxThread].first << " to " << (*intervals)[idxThread].second << "\n");
}
offsetShape
+=
nbElements
;
for
(
int
idxThread
=
0
;
idxThread
<
MaxThreads
;
++
idxThread
){
}
delete
[]
workloadBufferThread
[
idxThread
];
}
}
}
}
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment