Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
S
ScalFMM
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
5
Issues
5
List
Boards
Labels
Service Desk
Milestones
Operations
Operations
Incidents
Packages & Registries
Packages & Registries
Container Registry
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
solverstack
ScalFMM
Commits
1f69b258
Commit
1f69b258
authored
Mar 30, 2012
by
Matthias Messner
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'master' of
git+ssh://scm.gforge.inria.fr//gitroot//scalfmm/scalfmm
parents
57ea27fd
d788edc1
Changes
18
Hide whitespace changes
Inline
Side-by-side
Showing
18 changed files
with
2425 additions
and
34 deletions
+2425
-34
Src/Components/FTestKernels.hpp
Src/Components/FTestKernels.hpp
+3
-2
Src/Core/FFmmAlgorithmStarpu.hpp
Src/Core/FFmmAlgorithmStarpu.hpp
+49
-10
Src/Core/FFmmAlgorithmStarpuGroup.hpp
Src/Core/FFmmAlgorithmStarpuGroup.hpp
+1458
-0
Tests/Kernels/testSphericalAlgorithm.cpp
Tests/Kernels/testSphericalAlgorithm.cpp
+13
-2
Tests/Kernels/testSphericalBlasAlgorithm.cpp
Tests/Kernels/testSphericalBlasAlgorithm.cpp
+11
-1
Tests/Kernels/testSphericalBlockBlasAlgorithm.cpp
Tests/Kernels/testSphericalBlockBlasAlgorithm.cpp
+11
-1
Tests/Kernels/testSphericalEwalAlgorithm.cpp
Tests/Kernels/testSphericalEwalAlgorithm.cpp
+12
-2
Tests/Kernels/testSphericalProcAlgorithm.cpp
Tests/Kernels/testSphericalProcAlgorithm.cpp
+11
-4
Tests/Kernels/testSphericalRotationAlgorithm.cpp
Tests/Kernels/testSphericalRotationAlgorithm.cpp
+11
-1
Tests/Kernels/testSphericalTsmAlgorithm.cpp
Tests/Kernels/testSphericalTsmAlgorithm.cpp
+11
-2
Tests/Kernels/testSphericalTsmNoTsm.cpp
Tests/Kernels/testSphericalTsmNoTsm.cpp
+5
-3
Tests/Kernels/testStarpuAlgorithm.cpp
Tests/Kernels/testStarpuAlgorithm.cpp
+5
-2
Tests/Kernels/testStarpuAlgorithmGroup.cpp
Tests/Kernels/testStarpuAlgorithmGroup.cpp
+172
-0
Tests/Kernels/testStarpuChebAlgorithm.cpp
Tests/Kernels/testStarpuChebAlgorithm.cpp
+6
-0
Tests/Kernels/testStarpuCompareAlgorithm.cpp
Tests/Kernels/testStarpuCompareAlgorithm.cpp
+299
-0
Tests/Kernels/testStarpuGroupCompareAlgorithm.cpp
Tests/Kernels/testStarpuGroupCompareAlgorithm.cpp
+337
-0
Tests/Kernels/testStarpuSphericalAlgorithm.cpp
Tests/Kernels/testStarpuSphericalAlgorithm.cpp
+5
-2
Tests/Kernels/testTuneSphericalBlockBlas.cpp
Tests/Kernels/testTuneSphericalBlockBlas.cpp
+6
-2
No files found.
Src/Components/FTestKernels.hpp
View file @
1f69b258
...
...
@@ -150,7 +150,8 @@ void ValidateFMMAlgo(OctreeClass* const tree){
octreeIterator
.
gotoBottomLeft
();
do
{
if
(
octreeIterator
.
getCurrentCell
()
->
getDataUp
()
!=
octreeIterator
.
getCurrentListSrc
()
->
getSize
()
){
std
::
cout
<<
"Problem P2M : "
<<
(
octreeIterator
.
getCurrentCell
()
->
getDataUp
()
-
octreeIterator
.
getCurrentListSrc
()
->
getSize
())
<<
"
\n
"
;
std
::
cout
<<
"Problem P2M : "
<<
octreeIterator
.
getCurrentCell
()
->
getDataUp
()
<<
" (should be "
<<
octreeIterator
.
getCurrentListSrc
()
->
getSize
()
<<
")
\n
"
;
}
NbPart
+=
octreeIterator
.
getCurrentListSrc
()
->
getSize
();
}
while
(
octreeIterator
.
moveRight
());
...
...
@@ -194,7 +195,7 @@ void ValidateFMMAlgo(OctreeClass* const tree){
// there is a problem
if
(
(
!
isUsingTsm
&&
iter
.
data
().
getDataDown
()
!=
NbPart
-
1
)
||
(
isUsingTsm
&&
iter
.
data
().
getDataDown
()
!=
NbPart
)
){
std
::
cout
<<
"Problem L2P + P2P : "
<<
iter
.
data
().
getDataDown
()
<<
"
\n
"
;
std
::
cout
<<
"Problem L2P + P2P : "
<<
iter
.
data
().
getDataDown
()
<<
"
("
<<
octreeIterator
.
getCurrentGlobalIndex
()
<<
")
\n
"
;
}
iter
.
gotoNext
();
}
...
...
Src/Core/FFmmAlgorithmStarpu.hpp
View file @
1f69b258
...
...
@@ -43,8 +43,8 @@ struct StarHandle : public FNoCopyable, public FNoAssignement {
/** Release the handle */
~
StarHandle
(){
if
(
handle
!=
((
void
*
)
0
)
){
//
starpu_data_unregister(handle);
if
(
handle
!=
starpu_data_handle_t
(
0
)
){
starpu_data_unregister
(
handle
);
}
}
...
...
@@ -69,7 +69,7 @@ struct StarHandle : public FNoCopyable, public FNoAssignement {
/** Release data */
void
unregisterData
(){
if
(
handle
!=
((
void
*
)
0
)
){
//
starpu_data_unregister(handle);
starpu_data_unregister
(
handle
);
memset
(
&
handle
,
0
,
sizeof
(
starpu_data_handle_t
));
}
}
...
...
@@ -259,6 +259,7 @@ class FFmmAlgorithmStarpu : protected FAssertable{
KernelClass
*
const
kernels
;
//< The kernels
const
int
OctreeHeight
;
const
bool
putNameInTask
;
//////////////////////////////////////////////////////////////////
// Codelets
...
...
@@ -280,7 +281,7 @@ class FFmmAlgorithmStarpu : protected FAssertable{
starpu_perfmodel
l2p_model
;
// Init the codelet
void
initCodelets
(
const
bool
putNameInTask
){
void
initCodelets
(){
memset
(
&
p2p_model
,
0
,
sizeof
(
p2p_model
));
p2p_model
.
type
=
STARPU_HISTORY_BASED
;
p2p_model
.
symbol
=
"P2P"
;
...
...
@@ -341,6 +342,8 @@ class FFmmAlgorithmStarpu : protected FAssertable{
l2p_cl
.
where
=
STARPU_CPU
;
l2p_cl
.
cpu_funcs
[
0
]
=
l2p_cpu
;
l2p_cl
.
nbuffers
=
2
;
l2p_cl
.
modes
[
0
]
=
STARPU_R
;
l2p_cl
.
modes
[
1
]
=
STARPU_RW
;
if
(
putNameInTask
)
l2p_cl
.
model
=
&
l2p_model
;
// M2M & L2L
...
...
@@ -400,6 +403,29 @@ class FFmmAlgorithmStarpu : protected FAssertable{
}
void
releaseHandles
(){
typename
OctreeClass
::
Iterator
octreeIterator
(
tree
);
octreeIterator
.
gotoBottomLeft
();
typename
OctreeClass
::
Iterator
avoidGotoLeftIterator
(
octreeIterator
);
// init leaf handle
do
{
octreeIterator
.
getCurrentLeaf
()
->
getSrc
()
->
handle
.
unregisterData
();
if
(
octreeIterator
.
getCurrentLeaf
()
->
getSrc
()
!=
octreeIterator
.
getCurrentLeaf
()
->
getTargets
()){
octreeIterator
.
getCurrentLeaf
()
->
getTargets
()
->
handle
.
unregisterData
();
}
}
while
(
octreeIterator
.
moveRight
());
octreeIterator
=
avoidGotoLeftIterator
;
// init cells handle
for
(
int
idxLevel
=
OctreeHeight
-
1
;
idxLevel
>
1
;
--
idxLevel
){
do
{
octreeIterator
.
getCurrentCell
()
->
handleUp
.
unregisterData
();
octreeIterator
.
getCurrentCell
()
->
handleDown
.
unregisterData
();
}
while
(
octreeIterator
.
moveRight
());
avoidGotoLeftIterator
.
moveUp
();
octreeIterator
=
avoidGotoLeftIterator
;
}
}
//////////////////////////////////////////////////////////////////
...
...
@@ -433,20 +459,33 @@ public:
* @param inKernels the kernels to call
* An assert is launched if one of the arguments is null
*/
FFmmAlgorithmStarpu
(
OctreeClass
*
const
inTree
,
KernelClass
*
const
inKernels
,
const
bool
putNameInTask
=
false
)
:
tree
(
inTree
)
,
kernels
(
inKernels
),
OctreeHeight
(
tree
->
getHeight
())
{
FFmmAlgorithmStarpu
(
OctreeClass
*
const
inTree
,
KernelClass
*
const
inKernels
,
const
bool
inPutNameInTask
=
false
)
:
tree
(
inTree
)
,
kernels
(
inKernels
),
OctreeHeight
(
tree
->
getHeight
()),
putNameInTask
(
inPutNameInTask
)
{
FDEBUG
(
FDebug
::
Controller
<<
"FFmmAlgorithmStarpu
\n
"
);
}
/** Default destructor */
virtual
~
FFmmAlgorithmStarpu
(){
}
/** Run starpu */
void
initStarpu
(
const
int
nbThreads
=
-
1
){
starpu_conf
setup
;
starpu_conf_init
(
&
setup
);
setup
.
ncpus
=
nbThreads
;
// Run starpu
starpu_init
(
NULL
);
starpu_init
(
&
setup
);
FDEBUG
(
FDebug
::
Controller
<<
"Init starpu, there are "
<<
starpu_worker_get_count
()
<<
" workers
\n
"
);
// Init
initCodelets
(
putNameInTask
);
initCodelets
();
initHandles
();
initKernels
();
}
/**
Default destructor
*/
v
irtual
~
FFmmAlgorithm
Starpu
(){
/**
Release starpu
*/
v
oid
release
Starpu
(){
// Release stuff
releaseCodelets
();
releaseHandles
();
...
...
Src/Core/FFmmAlgorithmStarpuGroup.hpp
0 → 100644
View file @
1f69b258
// ===================================================================================
// Logiciel initial: ScalFmm Version 0.5
// Co-auteurs : Olivier Coulaud, Bérenger Bramas.
// Propriétaires : INRIA.
// Copyright © 2011-2012, diffusé sous les termes et conditions d’une licence propriétaire.
// Initial software: ScalFmm Version 0.5
// Co-authors: Olivier Coulaud, Bérenger Bramas.
// Owners: INRIA.
// Copyright © 2011-2012, spread under the terms and conditions of a proprietary license.
// ===================================================================================
#ifndef FFMMALGORITHMSTARPUGROUP_HPP
#define FFMMALGORITHMSTARPUGROUP_HPP
#include "../Utils/FAssertable.hpp"
#include "../Utils/FDebug.hpp"
#include "../Utils/FTrace.hpp"
#include "../Utils/FTic.hpp"
#include "../Utils/FGlobal.hpp"
#include "../Utils/FMemUtils.hpp"
#include "../Containers/FOctree.hpp"
#include "../Containers/FBoolArray.hpp"
#include "../Extensions/FExtendCoordinate.hpp"
#include "../Extensions/FExtendMortonIndex.hpp"
#include <starpu.h>
/*
TODO:
scinder multipole/local
*/
/**
* @author Berenger Bramas (berenger.bramas@inria.fr)
* @class FFmmAlgorithmStarpuGroup
* @brief
* Please read the license
*/
template
<
class
OctreeClass
,
class
ParticleClass
,
class
CellClass
,
class
ContainerClass
,
class
KernelClass
,
class
LeafClass
>
class
FFmmAlgorithmStarpuGroup
:
protected
FAssertable
{
/////////////////////////////////////////////////////////////
// Utils classes
/////////////////////////////////////////////////////////////
struct
MortonContainer
:
public
FExtendMortonIndex
,
public
FExtendCoordinate
{
ContainerClass
container
;
};
/** This structure holds the data properties needed
* by a cell/leaf to finish its computation
*/
struct
TransferProperties
{
explicit
TransferProperties
(
const
int
inIndex
=
0
,
const
int
inPosition
=
0
,
const
int
inDataPos
=
0
)
:
indexWhoNeedsData
(
inIndex
),
positionInComputationArray
(
inPosition
),
positionInDataArray
(
inDataPos
)
{
}
// In the group destination, who need the data?
int
indexWhoNeedsData
;
// where to put the data in the array
int
positionInComputationArray
;
// Where to read the data from?
int
positionInDataArray
;
};
/** The transfer buffer holds many properties
* it has enough information to create a copy task and
* a process task
*/
struct
TransferBuffer
{
TransferBuffer
()
:
groupDestination
(
0
)
{
}
// the group who need the data
int
groupDestination
;
// position in the original group
FVector
<
int
>
originalIndexPosition
;
// transfer properties
FVector
<
TransferProperties
>
compuationProperties
;
// where data will be copied
int
indexToStarCopying
;
};
/** A group contains several cells
* and some properties
*/
struct
Group
{
Group
()
:
cellArray
(
0
),
needOther
(
0
),
leavesArray
(
0
),
transferBufferCell
(
0
),
nbCellToReceive
(
0
),
transferBufferLeaf
(
0
),
nbLeafToReceive
(
0
)
{
handleCellArrayUp
=
0
;
handleCellArrayDown
=
0
;
handleLeafArray
=
0
;
handleLeafArrayRead
=
0
;
handleTransferCell
=
0
;
handleTransferLeaf
=
0
;
}
~
Group
(){
delete
[]
cellArray
;
delete
[]
needOther
;
delete
[]
leavesArray
;
for
(
int
idx
=
0
;
idx
<
dataToSend
.
getSize
()
;
++
idx
){
delete
dataToSend
[
idx
];
}
delete
[]
transferBufferCell
;
delete
[]
transferBufferLeaf
;
if
(
handleCellArrayUp
!=
starpu_data_handle_t
(
0
))
starpu_data_unregister
(
handleCellArrayUp
);
if
(
handleCellArrayDown
!=
starpu_data_handle_t
(
0
))
starpu_data_unregister
(
handleCellArrayDown
);
if
(
handleLeafArray
!=
starpu_data_handle_t
(
0
))
starpu_data_unregister
(
handleLeafArray
);
if
(
handleLeafArrayRead
!=
starpu_data_handle_t
(
0
))
starpu_data_unregister
(
handleLeafArrayRead
);
if
(
handleTransferCell
!=
starpu_data_handle_t
(
0
))
starpu_data_unregister
(
handleTransferCell
);
if
(
handleTransferLeaf
!=
starpu_data_handle_t
(
0
))
starpu_data_unregister
(
handleTransferLeaf
);
}
// Morton index the group start at
MortonIndex
beginIndex
;
// Morton index the group end at
MortonIndex
endIndex
;
// Number of elements in the group, usually GroupSize
int
nbElements
;
// The data of the group
CellClass
*
FRestrict
cellArray
;
bool
*
needOther
;
// Or the leaves data
MortonContainer
*
FRestrict
leavesArray
;
// Information needed to compute parent child operations
int
indexOfStartInLowerGroups
;
FVector
<
Group
*>
lowerGroups
;
// Information needed in case of transfering data needed
FVector
<
TransferBuffer
*>
dataToSend
;
// memory to copy before compute remotly
CellClass
*
FRestrict
transferBufferCell
;
int
nbCellToReceive
;
// memory to copy before compute remotly
MortonContainer
*
FRestrict
transferBufferLeaf
;
int
nbLeafToReceive
;
// Starpu data
starpu_data_handle_t
handleCellArrayUp
;
starpu_data_handle_t
handleCellArrayDown
;
starpu_data_handle_t
handleLeafArray
;
starpu_data_handle_t
handleLeafArrayRead
;
starpu_data_handle_t
handleTransferCell
;
starpu_data_handle_t
handleTransferLeaf
;
};
//////////////////////////////////////////////////////////////////
// Init Kernels
//////////////////////////////////////////////////////////////////
// Init the fmm kernel (1 per thread)
void
initKernels
(){
globalKernels
=
new
KernelClass
*
[
starpu_worker_get_count
()];
memset
(
globalKernels
,
0
,
sizeof
(
KernelClass
*
)
*
starpu_worker_get_count
());
for
(
unsigned
int
workerid
=
0
;
workerid
<
starpu_worker_get_count
();
++
workerid
){
if
(
starpu_worker_get_type
(
workerid
)
==
STARPU_CPU_WORKER
){
globalKernels
[
workerid
]
=
new
KernelClass
(
*
kernel
);
}
}
}
// Delete kernels
void
releaseKernels
(){
for
(
unsigned
int
workerid
=
0
;
workerid
<
starpu_worker_get_count
();
++
workerid
){
delete
globalKernels
[
workerid
];
}
delete
[]
globalKernels
;
}
/////////////////////////////////////////////////////////////
// Attributes
/////////////////////////////////////////////////////////////
OctreeClass
*
const
tree
;
//< The octree to work on
const
int
OctreeHeight
;
//< Height of the tree
const
int
BlockSize
;
//< Size of the block
Group
**
const
blockedTree
;
//< Current block tree
int
*
const
blockedPerLevel
;
//< Number of block per level
KernelClass
*
const
kernel
;
//< The kernel
const
bool
useStarpuPerfModel
;
//< to know if perf model has to be used
static
const
int
MaxChild
=
9
;
starpu_codelet
p2m_cl
;
starpu_codelet
p2p_cl
;
starpu_codelet
p2p_restore_cl
;
starpu_codelet
m2m_cl
[
MaxChild
];
starpu_codelet
m2l_cl
;
starpu_codelet
m2l_other_cl
;
starpu_codelet
m2l_copy_cl
;
starpu_codelet
l2l_cl
[
MaxChild
];
starpu_codelet
l2p_cl
;
starpu_perfmodel
p2p_model
;
starpu_perfmodel
p2p_restore_model
;
starpu_perfmodel
p2m_model
;
starpu_perfmodel
m2m_model
;
starpu_perfmodel
m2l_model
;
starpu_perfmodel
m2l_other_model
;
starpu_perfmodel
m2l_copy_model
;
starpu_perfmodel
l2l_model
;
starpu_perfmodel
l2p_model
;
void
initCodelet
(){
// init perf model
memset
(
&
p2p_model
,
0
,
sizeof
(
p2p_model
));
p2p_model
.
type
=
STARPU_HISTORY_BASED
;
p2p_model
.
symbol
=
"P2P"
;
memset
(
&
p2p_restore_model
,
0
,
sizeof
(
p2p_restore_model
));
p2p_restore_model
.
type
=
STARPU_HISTORY_BASED
;
p2p_restore_model
.
symbol
=
"P2P Restore"
;
memset
(
&
p2m_model
,
0
,
sizeof
(
p2m_model
));
p2m_model
.
type
=
STARPU_HISTORY_BASED
;
p2m_model
.
symbol
=
"P2M"
;
memset
(
&
m2l_model
,
0
,
sizeof
(
m2l_model
));
m2l_model
.
type
=
STARPU_HISTORY_BASED
;
m2l_model
.
symbol
=
"M2L"
;
memset
(
&
m2l_other_model
,
0
,
sizeof
(
m2l_other_model
));
m2l_other_model
.
type
=
STARPU_HISTORY_BASED
;
m2l_other_model
.
symbol
=
"M2L Other"
;
memset
(
&
m2l_copy_model
,
0
,
sizeof
(
m2l_model
));
m2l_copy_model
.
type
=
STARPU_HISTORY_BASED
;
m2l_copy_model
.
symbol
=
"M2L Copy"
;
memset
(
&
l2p_model
,
0
,
sizeof
(
l2p_model
));
l2p_model
.
type
=
STARPU_HISTORY_BASED
;
l2p_model
.
symbol
=
"L2P"
;
memset
(
&
l2l_model
,
0
,
sizeof
(
l2l_model
));
l2l_model
.
type
=
STARPU_HISTORY_BASED
;
l2l_model
.
symbol
=
"L2L"
;
memset
(
&
m2m_model
,
0
,
sizeof
(
m2m_model
));
m2m_model
.
type
=
STARPU_HISTORY_BASED
;
m2m_model
.
symbol
=
"M2M"
;
// P2M
memset
(
&
p2m_cl
,
0
,
sizeof
(
p2m_cl
));
p2m_cl
.
where
=
STARPU_CPU
;
p2m_cl
.
cpu_funcs
[
0
]
=
p2m_cpu
;
p2m_cl
.
nbuffers
=
2
;
p2m_cl
.
modes
[
0
]
=
STARPU_W
;
p2m_cl
.
modes
[
1
]
=
STARPU_R
;
if
(
useStarpuPerfModel
)
p2m_cl
.
model
=
&
p2m_model
;
// P2P
memset
(
&
p2p_cl
,
0
,
sizeof
(
starpu_codelet
)
);
p2p_cl
.
where
=
STARPU_CPU
;
p2p_cl
.
cpu_funcs
[
0
]
=
p2p_cpu
;
p2p_cl
.
nbuffers
=
2
;
p2p_cl
.
modes
[
0
]
=
STARPU_RW
;
p2p_cl
.
modes
[
1
]
=
STARPU_RW
;
if
(
useStarpuPerfModel
)
p2p_cl
.
model
=
&
p2p_model
;
// P2P restore
memset
(
&
p2p_restore_cl
,
0
,
sizeof
(
starpu_codelet
)
);
p2p_restore_cl
.
where
=
STARPU_CPU
;
p2p_restore_cl
.
cpu_funcs
[
0
]
=
p2p_restore_cpu
;
p2p_restore_cl
.
nbuffers
=
2
;
p2p_restore_cl
.
modes
[
0
]
=
STARPU_RW
;
p2p_restore_cl
.
modes
[
1
]
=
STARPU_R
;
if
(
useStarpuPerfModel
)
p2p_restore_cl
.
model
=
&
p2p_restore_model
;
// L2P
memset
(
&
l2p_cl
,
0
,
sizeof
(
l2p_cl
));
l2p_cl
.
where
=
STARPU_CPU
;
l2p_cl
.
cpu_funcs
[
0
]
=
l2p_cpu
;
l2p_cl
.
nbuffers
=
2
;
l2p_cl
.
modes
[
0
]
=
STARPU_R
;
l2p_cl
.
modes
[
1
]
=
STARPU_RW
;
if
(
useStarpuPerfModel
)
l2p_cl
.
model
=
&
l2p_model
;
// M2L
memset
(
&
m2l_cl
,
0
,
sizeof
(
starpu_codelet
)
);
m2l_cl
.
where
=
STARPU_CPU
;
m2l_cl
.
cpu_funcs
[
0
]
=
m2l_cpu
;
m2l_cl
.
nbuffers
=
2
;
m2l_cl
.
modes
[
0
]
=
STARPU_RW
;
m2l_cl
.
modes
[
1
]
=
STARPU_R
;
if
(
useStarpuPerfModel
)
m2l_cl
.
model
=
&
m2l_model
;
// M2L other
memset
(
&
m2l_other_cl
,
0
,
sizeof
(
starpu_codelet
)
);
m2l_other_cl
.
where
=
STARPU_CPU
;
m2l_other_cl
.
cpu_funcs
[
0
]
=
m2l_other_cpu
;
m2l_other_cl
.
nbuffers
=
2
;
m2l_other_cl
.
modes
[
0
]
=
STARPU_RW
;
m2l_other_cl
.
modes
[
1
]
=
STARPU_R
;
if
(
useStarpuPerfModel
)
m2l_other_cl
.
model
=
&
m2l_other_model
;
// M2L copy
memset
(
&
m2l_copy_cl
,
0
,
sizeof
(
starpu_codelet
)
);
m2l_copy_cl
.
where
=
STARPU_CPU
;
m2l_copy_cl
.
cpu_funcs
[
0
]
=
m2l_copy_cpu
;
m2l_copy_cl
.
nbuffers
=
2
;
m2l_copy_cl
.
modes
[
0
]
=
STARPU_RW
;
m2l_copy_cl
.
modes
[
1
]
=
STARPU_R
;
if
(
useStarpuPerfModel
)
m2l_copy_cl
.
model
=
&
m2l_copy_model
;
// M2M & L2L
memset
(
m2m_cl
,
0
,
sizeof
(
starpu_codelet
)
*
MaxChild
);
memset
(
l2l_cl
,
0
,
sizeof
(
starpu_codelet
)
*
MaxChild
);
for
(
int
idxChild
=
0
;
idxChild
<
MaxChild
;
++
idxChild
){
m2m_cl
[
idxChild
].
where
=
STARPU_CPU
;
m2m_cl
[
idxChild
].
cpu_funcs
[
0
]
=
m2m_cpu
;
m2m_cl
[
idxChild
].
nbuffers
=
idxChild
+
2
;
m2m_cl
[
idxChild
].
modes
[
0
]
=
STARPU_W
;
if
(
useStarpuPerfModel
)
m2m_cl
[
idxChild
].
model
=
&
m2m_model
;
l2l_cl
[
idxChild
].
where
=
STARPU_CPU
;
l2l_cl
[
idxChild
].
cpu_funcs
[
0
]
=
l2l_cpu
;
l2l_cl
[
idxChild
].
nbuffers
=
idxChild
+
2
;
l2l_cl
[
idxChild
].
modes
[
0
]
=
STARPU_R
;
if
(
useStarpuPerfModel
)
l2l_cl
[
idxChild
].
model
=
&
l2l_model
;
for
(
int
idxMode
=
0
;
idxMode
<=
idxChild
;
++
idxMode
){
m2m_cl
[
idxChild
].
modes
[
idxMode
+
1
]
=
STARPU_R
;
l2l_cl
[
idxChild
].
modes
[
idxMode
+
1
]
=
STARPU_RW
;
}
}
}
public:
/** The constructor need the octree and the kernel used for computation
* @param inTree the octree to work on
* @param inKernels the kernel to call
* An assert is launched if one of the arguments is null
*/
FFmmAlgorithmStarpuGroup
(
OctreeClass
*
const
inTree
,
KernelClass
*
const
inKernel
,
const
int
inBlockedSize
=
250
,
const
bool
inUseStarpuPerfModel
=
false
)
:
tree
(
inTree
),
OctreeHeight
(
tree
->
getHeight
()),
BlockSize
(
inBlockedSize
),
blockedTree
(
new
Group
*
[
OctreeHeight
+
1
])
,
blockedPerLevel
(
new
int
[
OctreeHeight
+
1
]),
kernel
(
inKernel
),
useStarpuPerfModel
(
inUseStarpuPerfModel
)
{
fassert
(
tree
,
"tree cannot be null"
,
__LINE__
,
__FILE__
);
fassert
(
kernel
,
"kernel cannot be null"
,
__LINE__
,
__FILE__
);
memset
(
blockedTree
,
0
,
sizeof
(
Group
*
)
*
(
OctreeHeight
+
1
));
memset
(
blockedPerLevel
,
0
,
(
OctreeHeight
+
1
)
*
sizeof
(
int
));
FDEBUG
(
FDebug
::
Controller
<<
"FFmmAlgorithmStarpuGroup (Block size = "
<<
BlockSize
<<
")
\n
"
);
}
/** Default destructor */
virtual
~
FFmmAlgorithmStarpuGroup
(){
delete
[]
blockedTree
;
delete
[]
blockedPerLevel
;
}
/////////////////////////////////////////////////////////////
// Tree to group functions
/////////////////////////////////////////////////////////////
/**
*/
void
buildGroups
(
const
int
nbThreads
=
-
1
){
FTRACE
(
FTrace
::
FFunction
functionTrace
(
__FUNCTION__
,
"Fmm"
,
__FILE__
,
__LINE__
)
);
starpu_conf
setup
;
starpu_conf_init
(
&
setup
);
setup
.
ncpus
=
nbThreads
;
// Run starpu
starpu_init
(
&
setup
);
FDEBUG
(
FDebug
::
Controller
<<
"Start starpu runtime, Nb Workers = "
<<
starpu_worker_get_count
()
<<
"
\n
"
);
// create codelet
initCodelet
();
// create kernel for all thread
initKernels
();
// Count leaf to allocate and big array
typename
OctreeClass
::
Iterator
*
iterArray
=
0
;
{
int
leafsNumber
=
0
;
typename
OctreeClass
::
Iterator
octreeIterator
(
tree
);
octreeIterator
.
gotoBottomLeft
();
do
{
++
leafsNumber
;
}
while
(
octreeIterator
.
moveRight
());
iterArray
=
new
typename
OctreeClass
::
Iterator
[
leafsNumber
];
fassert
(
iterArray
,
"iterArray bad alloc"
,
__LINE__
,
__FILE__
);
}
FDEBUG
(
FDebug
::
Controller
<<
"
\t
Copy the tree
\n
"
;
);
// Then we start creating the block
{
typename
OctreeClass
::
Iterator
octreeIterator
(
tree
);
typename
OctreeClass
::
Iterator
avoidGotLeftIterator
(
octreeIterator
);
for
(
int
idxLevel
=
1
;
idxLevel
<
OctreeHeight
;
++
idxLevel
){
// put every thing in the array
int
counterAtLevel
=
0
;
do
{
iterArray
[
counterAtLevel
++
]
=
octreeIterator
;
}
while
(
octreeIterator
.
moveRight
());
avoidGotLeftIterator
.
moveDown
();
octreeIterator
=
avoidGotLeftIterator
;
// find the number of groups
const
int
NbGroups
=
(
counterAtLevel
+
BlockSize
-
1
)
/
BlockSize
;
FDEBUG
(
FDebug
::
Controller
<<
"
\t\t
At level "
<<
idxLevel
<<
" there are "
<<
NbGroups
<<
" groups
\n
"
;
);
blockedPerLevel
[
idxLevel
]
=
NbGroups
;
blockedTree
[
idxLevel
]
=
new
Group
[
NbGroups
];
// copy data to group
int
copyIndex
=
0
;
for
(
int
idxGroup
=
0
;
idxGroup
<
NbGroups
;
++
idxGroup
){
const
int
cellsInThisGroup
=
FMath
::
Min
(
BlockSize
,
counterAtLevel
-
copyIndex
);
blockedTree
[
idxLevel
][
idxGroup
].
nbElements
=
cellsInThisGroup
;
blockedTree
[
idxLevel
][
idxGroup
].
cellArray
=
new
CellClass
[
cellsInThisGroup
];
blockedTree
[
idxLevel
][
idxGroup
].
needOther
=
new
bool
[
cellsInThisGroup
];
// starpu
starpu_vector_data_register
(
&
blockedTree
[
idxLevel
][
idxGroup
].
handleCellArrayUp
,
0
,
(
uintptr_t
)
blockedTree
[
idxLevel
][
idxGroup
].
cellArray
,
blockedTree
[
idxLevel
][
idxGroup
].
nbElements
,
sizeof
(
CellClass
));
starpu_vector_data_register
(
&
blockedTree
[
idxLevel
][
idxGroup
].
handleCellArrayDown
,
0
,
(
uintptr_t
)
blockedTree
[
idxLevel
][
idxGroup
].
cellArray
,
blockedTree
[
idxLevel
][
idxGroup
].
nbElements
,
sizeof
(
CellClass
));
for
(
int
idxCell
=
0
;
idxCell
<
cellsInThisGroup
;
++
idxCell
,
++
copyIndex
){
blockedTree
[
idxLevel
][
idxGroup
].
cellArray
[
idxCell
].
setMortonIndex
(
iterArray
[
copyIndex
].
getCurrentGlobalIndex
()
);
blockedTree
[
idxLevel
][
idxGroup
].
cellArray
[
idxCell
].
setCoordinate
(
iterArray
[
copyIndex
].
getCurrentGlobalCoordinate
()
);
blockedTree
[
idxLevel
][
idxGroup
].
needOther
[
idxCell
]
=
false
;
blockedTree
[
idxLevel
][
idxGroup
].
cellArray
[
idxCell
].
intialCopy
(
iterArray
[
copyIndex
].
getCurrentCell
()
);
}
blockedTree
[
idxLevel
][
idxGroup
].
beginIndex
=
blockedTree
[
idxLevel
][
idxGroup
].
cellArray
[
0
].
getMortonIndex
();
blockedTree
[
idxLevel
][
idxGroup
].
endIndex
=
blockedTree
[
idxLevel
][
idxGroup
].
cellArray
[
cellsInThisGroup
-
1
].
getMortonIndex
();
}
}
// leaf level will have the same groups has head cell level
const
int
NbGroups
=
blockedPerLevel
[
OctreeHeight
-
1
];
blockedPerLevel
[
OctreeHeight
]
=
NbGroups
;
blockedTree
[
OctreeHeight
]
=
new
Group
[
NbGroups
];
int
copyIndex
=
0
;
for
(
int
idxGroup
=
0
;
idxGroup
<
NbGroups
;
++
idxGroup
){
blockedTree
[
OctreeHeight
][
idxGroup
].
nbElements
=
blockedTree
[
OctreeHeight
-
1
][
idxGroup
].
nbElements
;
blockedTree
[
OctreeHeight
][
idxGroup
].
beginIndex
=
blockedTree
[
OctreeHeight
-
1
][
idxGroup
].
beginIndex
;
blockedTree
[
OctreeHeight
][
idxGroup
].
endIndex
=
blockedTree
[
OctreeHeight
-
1
][
idxGroup
].
endIndex
;
const
int
NbLeaves
=
blockedTree
[
OctreeHeight
][
idxGroup
].
nbElements
;
blockedTree
[
OctreeHeight
][
idxGroup
].
leavesArray
=
new
MortonContainer
[
NbLeaves
];
// starpu
starpu_vector_data_register
(
&
blockedTree
[
OctreeHeight
][
idxGroup
].
handleLeafArray
,
0
,
(
uintptr_t
)
blockedTree
[
OctreeHeight
][
idxGroup
].
leavesArray
,
NbLeaves
,
sizeof
(
MortonContainer
));
starpu_vector_data_register
(
&
blockedTree
[
OctreeHeight
][
idxGroup
].
handleLeafArrayRead
,
0
,
(
uintptr_t
)
blockedTree
[
OctreeHeight
][
idxGroup
].
leavesArray
,
NbLeaves
,
sizeof
(
MortonContainer
));
for
(
int
idxLeaf
=
0
;
idxLeaf
<
NbLeaves
;
++
idxLeaf
,
++
copyIndex
){
blockedTree
[
OctreeHeight
][
idxGroup
].
leavesArray
[
idxLeaf
].
container
=
*
iterArray
[
copyIndex
].
getCurrentListSrc
();
blockedTree
[
OctreeHeight
][
idxGroup
].
leavesArray
[
idxLeaf
].
setMortonIndex
(
iterArray
[
copyIndex
].
getCurrentGlobalIndex
());
blockedTree
[
OctreeHeight
][
idxGroup
].
leavesArray
[
idxLeaf
].
setCoordinate
(
iterArray
[
copyIndex
].
getCurrentGlobalCoordinate
());
}
}
}
delete
[]
iterArray
;
iterArray
=
0
;
FDEBUG
(
FDebug
::
Controller
<<
"
\t
Prepare child parent relations
\n
"
;
);
// All block has been created, now find the parent-child relations
{
for
(
int
idxLevel
=
1
;
idxLevel
<
OctreeHeight
-
1
;
++
idxLevel
){
int
currentLowerGroup
=
0
;
FDEBUG
(
FReal
totalDependencies
=
0
);
// find the number of groups
const
int
NbGroups
=
blockedPerLevel
[
idxLevel
];
for
(
int
idxGroup
=
0
;
idxGroup
<
NbGroups
;
++
idxGroup
){
// copy current group
blockedTree
[
idxLevel
][
idxGroup
].
lowerGroups
.
push
(
&
blockedTree
[
idxLevel
+
1
][
currentLowerGroup
]
);
{
int
startIndex
=
0
;
while
(
(
blockedTree
[
idxLevel
+
1
][
currentLowerGroup
].
cellArray
[
startIndex
].
getMortonIndex
()
>>
3
)
!=
blockedTree
[
idxLevel
][
idxGroup
].
beginIndex
){
++
startIndex
;
}
blockedTree
[
idxLevel
][
idxGroup
].
indexOfStartInLowerGroups
=
startIndex
;
}
if
((
blockedTree
[
idxLevel
+
1
][
currentLowerGroup
].
endIndex
>>
3
)
<=
blockedTree
[
idxLevel
][
idxGroup
].
endIndex
){
++
currentLowerGroup
;
}
// copy until too much on the right