Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
solverstack
ScalFMM
Commits
ff1a2ea1
Commit
ff1a2ea1
authored
Oct 05, 2016
by
Quentin Khan
Browse files
FFmmAlgorithmThread: change to new kernel interface
parent
a51adc22
Changes
1
Hide whitespace changes
Inline
Side-by-side
Src/Core/FFmmAlgorithmThread.hpp
View file @
ff1a2ea1
...
...
@@ -16,6 +16,8 @@
#ifndef FFMMALGORITHMTHREAD_HPP
#define FFMMALGORITHMTHREAD_HPP
#include
<array>
#include
<algorithm>
#include
"../Utils/FAssert.hpp"
#include
"../Utils/FLog.hpp"
...
...
@@ -68,7 +70,7 @@ class FFmmAlgorithmThread : public FAbstractAlgorithm, public FAlgorithmTimers{
public:
/** Class constructor
*
*
* The constructor needs the octree and the kernels used for computation.
* \param inTree the octree to work on.
* \param inKernels the kernels to call.
...
...
@@ -85,7 +87,7 @@ public:
FAssertLF
(
tree
,
"tree cannot be null"
);
FAssertLF
(
leafLevelSeparationCriteria
<
3
,
"Separation criteria should be < 3"
);
FAssertLF
(
0
<
userChunkSize
,
"Chunk size should be > 0"
);
this
->
kernels
=
new
KernelClass
*
[
MaxThreads
];
#pragma omp parallel num_threads(MaxThreads)
{
...
...
@@ -94,13 +96,13 @@ public:
this
->
kernels
[
omp_get_thread_num
()]
=
new
KernelClass
(
*
inKernels
);
}
}
FAbstractAlgorithm
::
setNbLevelsInTree
(
tree
->
getHeight
());
FLOG
(
FLog
::
Controller
<<
"FFmmAlgorithmThread (Max Thread "
<<
omp_get_max_threads
()
<<
")
\n
"
);
FLOG
(
FLog
::
Controller
<<
"
\t
static schedule "
<<
(
userChunkSize
==
-
1
?
"static"
:
(
userChunkSize
==
0
?
"N/p^2"
:
std
::
to_string
(
userChunkSize
)))
<<
")
\n
"
);
}
/** Default destructor */
virtual
~
FFmmAlgorithmThread
(){
for
(
int
idxThread
=
0
;
idxThread
<
MaxThreads
;
++
idxThread
){
...
...
@@ -134,13 +136,13 @@ public:
}
else
{
return
userChunkSize
;
}
}
}
template
<
class
NumType
>
void
setChunkSize
(
const
NumType
size
)
{
userChunkSize
=
size
;
}
protected:
/**
* Runs the complete algorithm.
...
...
@@ -216,7 +218,10 @@ protected:
for
(
int
idxLeafs
=
0
;
idxLeafs
<
leafs
;
++
idxLeafs
){
// We need the current cell that represent the leaf
// and the list of particles
myThreadkernels
->
P2M
(
iterArray
[
idxLeafs
].
getCurrentCell
()
,
iterArray
[
idxLeafs
].
getCurrentListSrc
());
myThreadkernels
->
P2M
(
&
(
iterArray
[
idxLeafs
].
getCurrentCell
()
->
getMultipoleData
()),
iterArray
[
idxLeafs
].
getCurrentCell
(),
iterArray
[
idxLeafs
].
getCurrentListSrc
());
}
}
FLOG
(
computationCounter
.
tac
()
);
...
...
@@ -260,6 +265,7 @@ protected:
octreeIterator
=
avoidGotoLeftIterator
;
// equal octreeIterator.moveUp(); octreeIterator.gotoLeft();
const
int
chunkSize
=
this
->
getChunkSize
(
numberOfCells
);
(
void
)
chunkSize
;
// Used in OpenMP for loop, silence warning
FLOG
(
computationCounter
.
tic
());
#pragma omp parallel num_threads(MaxThreads)
...
...
@@ -267,9 +273,17 @@ protected:
KernelClass
*
const
myThreadkernels
=
kernels
[
omp_get_thread_num
()];
#pragma omp for nowait schedule(dynamic, chunkSize)
for
(
int
idxCell
=
0
;
idxCell
<
numberOfCells
;
++
idxCell
){
// We need the current cell and the child
// child is an array (of 8 child) that may be null
myThreadkernels
->
M2M
(
iterArray
[
idxCell
].
getCurrentCell
()
,
iterArray
[
idxCell
].
getCurrentChild
(),
idxLevel
);
// We need the current cell and its children
using
multipole_t
=
typename
CellClass
::
multipole_t
;
std
::
array
<
multipole_t
*
,
8
>
children_multipoles
;
CellClass
**
children
=
iterArray
[
idxCell
].
getCurrentChildren
();
std
::
transform
(
children
,
children
+
8
,
children_multipoles
.
begin
(),
[](
CellClass
*
c
)
{
return
(
c
==
nullptr
?
nullptr
:
&
(
c
->
getMultipoleData
()));});
myThreadkernels
->
M2M
(
&
(
iterArray
[
idxCell
].
getCurrentCell
()
->
getMultipoleData
()),
children_multipoles
.
data
(),
idxLevel
);
}
}
...
...
@@ -327,6 +341,7 @@ protected:
octreeIterator
=
avoidGotoLeftIterator
;
const
int
chunkSize
=
this
->
getChunkSize
(
numberOfCells
);
(
void
)
chunkSize
;
// Used in OpenMP for loop, silence warning
FLOG
(
computationCounter
.
tic
());
#pragma omp parallel num_threads(MaxThreads)
...
...
@@ -338,7 +353,22 @@ protected:
#pragma omp for schedule(dynamic, chunkSize) nowait
for
(
int
idxCell
=
0
;
idxCell
<
numberOfCells
;
++
idxCell
){
const
int
counter
=
tree
->
getInteractionNeighbors
(
neighbors
,
neighborPositions
,
iterArray
[
idxCell
].
getCurrentGlobalCoordinate
(),
idxLevel
,
separationCriteria
);
if
(
counter
)
myThreadkernels
->
M2L
(
iterArray
[
idxCell
].
getCurrentCell
()
,
neighbors
,
neighborPositions
,
counter
,
idxLevel
);
if
(
counter
)
{
using
multipole_t
=
typename
CellClass
::
multipole_t
;
std
::
array
<
const
multipole_t
*
,
342
>
neighbor_multipoles
;
std
::
transform
(
neighbors
,
neighbors
+
counter
,
neighbor_multipoles
.
begin
(),
[](
const
CellClass
*
c
)
{
return
(
c
==
nullptr
?
nullptr
:
&
(
c
->
getMultipoleData
()));
});
myThreadkernels
->
M2L
(
&
(
iterArray
[
idxCell
].
getCurrentCell
()
->
getLocalExpansionData
()),
neighbor_multipoles
.
data
(),
neighborPositions
,
counter
,
idxLevel
);
}
}
myThreadkernels
->
finishedLevelM2L
(
idxLevel
);
...
...
@@ -384,14 +414,29 @@ protected:
avoidGotoLeftIterator
.
moveDown
();
octreeIterator
=
avoidGotoLeftIterator
;
FLOG
(
computationCounter
.
tic
());
const
int
chunkSize
=
this
->
getChunkSize
(
numberOfCells
);
(
void
)
chunkSize
;
// Used in OpenMP for loop, silence warning
FLOG
(
computationCounter
.
tic
());
#pragma omp parallel num_threads(MaxThreads)
{
KernelClass
*
const
myThreadkernels
=
kernels
[
omp_get_thread_num
()];
#pragma omp for nowait schedule(dynamic, chunkSize)
for
(
int
idxCell
=
0
;
idxCell
<
numberOfCells
;
++
idxCell
){
myThreadkernels
->
L2L
(
iterArray
[
idxCell
].
getCurrentCell
()
,
iterArray
[
idxCell
].
getCurrentChild
(),
idxLevel
);
using
local_expansion_t
=
typename
CellClass
::
local_expansion_t
;
std
::
array
<
local_expansion_t
*
,
8
>
children_expansions
;
CellClass
**
children
=
iterArray
[
idxCell
].
getCurrentChildren
();
std
::
transform
(
children
,
children
+
8
,
children_expansions
.
begin
(),
[](
CellClass
*
c
)
{
return
(
c
==
nullptr
?
nullptr
:
&
(
c
->
getLocalExpansionData
()));
});
myThreadkernels
->
L2L
(
&
(
iterArray
[
idxCell
].
getCurrentCell
()
->
getLocalExpansionData
()),
children_expansions
.
data
(),
idxLevel
);
}
}
FLOG
(
computationCounter
.
tac
());
...
...
@@ -489,7 +534,10 @@ protected:
for
(
int
idxLeafs
=
previous
;
idxLeafs
<
endAtThisShape
;
++
idxLeafs
){
LeafData
&
currentIter
=
leafsDataArray
[
idxLeafs
];
if
(
l2pEnabled
){
myThreadkernels
.
L2P
(
currentIter
.
cell
,
currentIter
.
targets
);
myThreadkernels
.
L2P
(
&
(
currentIter
.
cell
->
getLocalExpansionData
()),
currentIter
.
cell
,
currentIter
.
targets
);
}
if
(
p2pEnabled
){
// need the current particles and neighbors particles
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment