Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
solverstack
ScalFMM
Commits
1f77bf7c
Commit
1f77bf7c
authored
Mar 15, 2015
by
BRAMAS Berenger
Browse files
Update cuda starpu to use a real class POD
parent
df70cc33
Changes
12
Expand all
Hide whitespace changes
Inline
Side-by-side
Src/GroupTree/Cuda/FCudaDeviceWrapper.cu
View file @
1f77bf7c
This diff is collapsed.
Click to expand it.
Src/GroupTree/Cuda/FCudaDeviceWrapper.hpp
View file @
1f77bf7c
...
...
@@ -6,54 +6,54 @@
#include
"../../Utils/FGlobal.hpp"
#include
"../FOutOfBlockInteraction.hpp"
template
<
class
CellContainerClass
,
class
ParticleContainerGroupClass
,
class
ParticleGroupClass
,
class
CudaKernelClass
>
template
<
class
CellClass
,
class
CellContainerClass
,
class
ParticleContainerGroupClass
,
class
ParticleGroupClass
,
class
CudaKernelClass
>
void
FCuda__bottomPassCallback
(
unsigned
char
*
leafCellsPtr
,
std
::
size_t
leafCellsSize
,
unsigned
char
*
containersPtr
,
std
::
size_t
containersSize
,
CudaKernelClass
*
kernel
,
cudaStream_t
currentStream
);
template
<
class
CellContainerClass
,
class
ParticleContainerGroupClass
,
class
ParticleGroupClass
,
class
CudaKernelClass
>
template
<
class
CellClass
,
class
CellContainerClass
,
class
ParticleContainerGroupClass
,
class
ParticleGroupClass
,
class
CudaKernelClass
>
void
FCuda__upwardPassCallback
(
unsigned
char
*
currentCellsPtr
,
std
::
size_t
currentCellsSize
,
unsigned
char
*
subCellGroupsPtr
[
9
],
std
::
size_t
subCellGroupsSize
[
9
],
int
nbSubCellGroups
,
int
idxLevel
,
CudaKernelClass
*
kernel
,
cudaStream_t
currentStream
);
template
<
class
CellContainerClass
,
class
ParticleContainerGroupClass
,
class
ParticleGroupClass
,
class
CudaKernelClass
>
template
<
class
CellClass
,
class
CellContainerClass
,
class
ParticleContainerGroupClass
,
class
ParticleGroupClass
,
class
CudaKernelClass
>
void
FCuda__transferInoutPassCallbackMpi
(
unsigned
char
*
currentCellsPtr
,
std
::
size_t
currentCellsSize
,
unsigned
char
*
externalCellsPtr
,
std
::
size_t
externalCellsSize
,
int
idxLevel
,
const
OutOfBlockInteraction
*
outsideInteractions
,
int
nbOutsideInteractions
,
CudaKernelClass
*
kernel
,
cudaStream_t
currentStream
);
template
<
class
CellContainerClass
,
class
ParticleContainerGroupClass
,
class
ParticleGroupClass
,
class
CudaKernelClass
>
template
<
class
CellClass
,
class
CellContainerClass
,
class
ParticleContainerGroupClass
,
class
ParticleGroupClass
,
class
CudaKernelClass
>
void
FCuda__transferInPassCallback
(
unsigned
char
*
currentCellsPtr
,
std
::
size_t
currentCellsSize
,
int
idxLevel
,
CudaKernelClass
*
kernel
,
cudaStream_t
currentStream
);
template
<
class
CellContainerClass
,
class
ParticleContainerGroupClass
,
class
ParticleGroupClass
,
class
CudaKernelClass
>
template
<
class
CellClass
,
class
CellContainerClass
,
class
ParticleContainerGroupClass
,
class
ParticleGroupClass
,
class
CudaKernelClass
>
void
FCuda__transferInoutPassCallback
(
unsigned
char
*
currentCellsPtr
,
std
::
size_t
currentCellsSize
,
unsigned
char
*
externalCellsPtr
,
std
::
size_t
externalCellsSize
,
int
idxLevel
,
const
OutOfBlockInteraction
*
outsideInteractions
,
int
nbOutsideInteractions
,
CudaKernelClass
*
kernel
,
cudaStream_t
currentStream
);
template
<
class
CellContainerClass
,
class
ParticleContainerGroupClass
,
class
ParticleGroupClass
,
class
CudaKernelClass
>
template
<
class
CellClass
,
class
CellContainerClass
,
class
ParticleContainerGroupClass
,
class
ParticleGroupClass
,
class
CudaKernelClass
>
void
FCuda__downardPassCallback
(
unsigned
char
*
currentCellsPtr
,
std
::
size_t
currentCellsSize
,
unsigned
char
*
subCellGroupsPtr
[
9
],
std
::
size_t
subCellGroupsSize
[
9
],
int
nbSubCellGroups
,
int
idxLevel
,
CudaKernelClass
*
kernel
,
cudaStream_t
currentStream
);
template
<
class
CellContainerClass
,
class
ParticleContainerGroupClass
,
class
ParticleGroupClass
,
class
CudaKernelClass
>
template
<
class
CellClass
,
class
CellContainerClass
,
class
ParticleContainerGroupClass
,
class
ParticleGroupClass
,
class
CudaKernelClass
>
void
FCuda__directInoutPassCallbackMpi
(
unsigned
char
*
containersPtr
,
std
::
size_t
containersSize
,
unsigned
char
*
externalContainersPtr
,
std
::
size_t
externalContainersSize
,
const
OutOfBlockInteraction
*
outsideInteractions
,
int
nbOutsideInteractions
,
const
int
treeHeight
,
CudaKernelClass
*
kernel
,
cudaStream_t
currentStream
);
template
<
class
CellContainerClass
,
class
ParticleContainerGroupClass
,
class
ParticleGroupClass
,
class
CudaKernelClass
>
template
<
class
CellClass
,
class
CellContainerClass
,
class
ParticleContainerGroupClass
,
class
ParticleGroupClass
,
class
CudaKernelClass
>
void
FCuda__directInPassCallback
(
unsigned
char
*
containersPtr
,
std
::
size_t
containersSize
,
const
int
treeHeight
,
CudaKernelClass
*
kernel
,
cudaStream_t
currentStream
);
template
<
class
CellContainerClass
,
class
ParticleContainerGroupClass
,
class
ParticleGroupClass
,
class
CudaKernelClass
>
template
<
class
CellClass
,
class
CellContainerClass
,
class
ParticleContainerGroupClass
,
class
ParticleGroupClass
,
class
CudaKernelClass
>
void
FCuda__directInoutPassCallback
(
unsigned
char
*
containersPtr
,
std
::
size_t
containersSize
,
unsigned
char
*
externalContainersPtr
,
std
::
size_t
externalContainersSize
,
const
OutOfBlockInteraction
*
outsideInteractions
,
int
nbOutsideInteractions
,
const
int
treeHeight
,
CudaKernelClass
*
kernel
,
cudaStream_t
currentStream
);
template
<
class
CellContainerClass
,
class
ParticleContainerGroupClass
,
class
ParticleGroupClass
,
class
CudaKernelClass
>
template
<
class
CellClass
,
class
CellContainerClass
,
class
ParticleContainerGroupClass
,
class
ParticleGroupClass
,
class
CudaKernelClass
>
void
FCuda__mergePassCallback
(
unsigned
char
*
leafCellsPtr
,
std
::
size_t
leafCellsSize
,
unsigned
char
*
containersPtr
,
std
::
size_t
containersSize
,
CudaKernelClass
*
kernel
,
cudaStream_t
currentStream
);
...
...
Src/GroupTree/Cuda/FCudaEmptyCell.hpp
0 → 100644
View file @
1f77bf7c
#ifndef FCUDAEMPTYCELL_HPP
#define FCUDAEMPTYCELL_HPP
#include
"../../Utils/FGlobal.hpp"
#include
"../../Containers/FTreeCoordinate.hpp"
#include
"../FStarPUDefaultAlign.hpp"
struct
alignas
(
FStarPUDefaultAlign
::
StructAlign
)
FCudaEmptyCell
{
MortonIndex
mortonIndex
;
int
coordinates
[
3
];
};
#endif // FCUDAEMPTYCELL_HPP
Src/GroupTree/Cuda/FCudaEmptyKernel.hpp
View file @
1f77bf7c
...
...
@@ -4,28 +4,29 @@
#include
"FCudaGlobal.hpp"
#include
"FCudaGroupAttachedLeaf.hpp"
#include
"FCudaEmptyCell.hpp"
/**
* This class defines what should be a Cuda kernel.
*/
template
<
class
ContainerClass
=
FCudaGroupAttachedLeaf
<
0
,
int
>
>
template
<
class
CellClass
=
FCudaEmptyCell
,
class
ContainerClass
=
FCudaGroupAttachedLeaf
<
0
,
int
>
>
class
FCudaEmptyKernel
{
protected:
public:
__device__
void
P2M
(
unsigned
char
*
const
/*pole*/
,
const
ContainerClass
*
const
/*particles*/
)
{
__device__
void
P2M
(
CellClass
*
/*pole*/
,
const
ContainerClass
*
const
/*particles*/
)
{
}
__device__
void
M2M
(
unsigned
char
*
const
/*pole*/
,
const
unsigned
char
*
const
*
const
/*child*/
,
const
int
/*level*/
)
{
__device__
void
M2M
(
CellClass
*
/*pole*/
,
const
CellClass
*
/*child*/
[
8
]
,
const
int
/*level*/
)
{
}
__device__
void
M2L
(
unsigned
char
*
const
/*pole*/
,
const
unsigned
char
*
/*distantNeighbors*/
[
343
],
__device__
void
M2L
(
CellClass
*
/*pole*/
,
const
CellClass
*
/*distantNeighbors*/
[
343
],
const
int
/*size*/
,
const
int
/*level*/
)
{
}
__device__
void
L2L
(
const
unsigned
char
*
const
/*local*/
,
unsigned
char
*
*
const
/*child*/
,
const
int
/*level*/
)
{
__device__
void
L2L
(
const
CellClass
*
/*local*/
,
CellClass
*
/*child*/
[
8
]
,
const
int
/*level*/
)
{
}
__device__
void
L2P
(
const
unsigned
char
*
const
/*local*/
,
ContainerClass
*
const
/*particles*/
){
__device__
void
L2P
(
const
CellClass
*
/*local*/
,
ContainerClass
*
const
/*particles*/
){
}
__device__
void
P2P
(
const
int3
&
,
...
...
@@ -38,16 +39,6 @@ public:
ContainerClass
*
const
/*directNeighborsParticles*/
[
27
],
const
int
){
}
__device__
MortonIndex
getMortonIndex
(
const
unsigned
char
*
/*cell*/
)
const
{
return
0
;
}
__device__
int3
getCoordinate
(
const
unsigned
char
*
/*cell*/
)
const
{
int3
coord
;
coord
.
x
=
coord
.
y
=
coord
.
z
=
0
;
return
coord
;
}
__host__
static
FCudaEmptyKernel
*
InitKernelKernel
(
void
*
){
return
nullptr
;
}
...
...
Src/GroupTree/Cuda/FCudaGroupOfCells.hpp
View file @
1f77bf7c
...
...
@@ -9,7 +9,7 @@
/**
* @brief The FCudaGroupOfCells class manages the cells in block allocation.
*/
template
<
c
onst
size_t
CellClass
Size
>
template
<
c
lass
CellClass
>
class
FCudaGroupOfCells
{
/** One header is allocated at the beginning of each block */
struct
alignas
(
FStarPUDefaultAlign
::
StructAlign
)
BlockHeader
{
...
...
@@ -108,14 +108,14 @@ public:
}
/** Return the address of the cell if it exists (or NULL) */
__device__
unsigned
char
*
getCell
(
const
MortonIndex
inIndex
){
if
(
exists
(
inIndex
)
)
return
&
blockCells
[
CellClass
Size
*
blockIndexesTable
[
inIndex
-
blockHeader
->
startingIndex
]];
__device__
CellClass
*
getCell
(
const
MortonIndex
inIndex
){
if
(
exists
(
inIndex
)
)
return
(
CellClass
*
)(
&
blockCells
[
sizeof
(
CellClass
)
*
blockIndexesTable
[
inIndex
-
blockHeader
->
startingIndex
]]
)
;
else
return
nullptr
;
}
/** Return the address of the cell if it exists (or NULL) */
__device__
const
unsigned
char
*
getCell
(
const
MortonIndex
inIndex
)
const
{
if
(
exists
(
inIndex
)
)
return
&
blockCells
[
CellClass
Size
*
blockIndexesTable
[
inIndex
-
blockHeader
->
startingIndex
]];
__device__
const
CellClass
*
getCell
(
const
MortonIndex
inIndex
)
const
{
if
(
exists
(
inIndex
)
)
return
(
CellClass
*
)(
&
blockCells
[
sizeof
(
CellClass
)
*
blockIndexesTable
[
inIndex
-
blockHeader
->
startingIndex
]]
)
;
else
return
nullptr
;
}
};
...
...
Src/GroupTree/Cuda/FCudaTestKernels.hpp
View file @
1f77bf7c
...
...
@@ -7,126 +7,61 @@
// We need to describe this cell
#include
"../../Components/FTestCell.hpp"
class
FTestCellCudaDescriptor
{
FTestCell
*
ptr
;
public:
__device__
FTestCellCudaDescriptor
(
unsigned
char
*
inPtr
)
:
ptr
(
reinterpret_cast
<
FTestCell
*>
(
inPtr
)){
}
__device__
long
long
int
&
dataUp
(){
return
ptr
->
dataUp
;
}
__device__
long
long
int
&
dataDown
(){
return
ptr
->
dataDown
;
}
__device__
MortonIndex
getMortonIndex
()
const
{
return
ptr
->
mortonIndex
;
}
__device__
int3
getCoordinate
()
const
{
const
int
*
coordinate
=
(
const
int
*
)
&
ptr
->
coordinate
;
int3
coord
;
coord
.
x
=
coordinate
[
0
];
coord
.
y
=
coordinate
[
1
];
coord
.
z
=
coordinate
[
2
];
return
coord
;
}
};
class
FTestCellCudaConstDescriptor
{
const
FTestCell
*
ptr
;
public:
__device__
FTestCellCudaConstDescriptor
(
const
unsigned
char
*
inPtr
)
:
ptr
(
reinterpret_cast
<
const
FTestCell
*>
(
inPtr
)){
}
__device__
const
long
long
int
&
dataUp
()
const
{
return
ptr
->
dataUp
;
}
__device__
const
long
long
int
&
dataDown
()
const
{
return
ptr
->
dataDown
;
}
__device__
MortonIndex
getMortonIndex
()
const
{
return
ptr
->
mortonIndex
;
}
__device__
int3
getCoordinate
()
const
{
const
int
*
coordinate
=
(
const
int
*
)
&
ptr
->
coordinate
;
int3
coord
;
coord
.
x
=
coordinate
[
0
];
coord
.
y
=
coordinate
[
1
];
coord
.
z
=
coordinate
[
2
];
return
coord
;
}
};
template
<
class
ContainerClass
>
template
<
class
CellClass
,
class
ContainerClass
>
class
FTestCudaKernels
{
public:
/** Before upward */
__device__
void
P2M
(
unsigned
char
*
const
pole
,
const
ContainerClass
*
const
particles
)
{
__device__
void
P2M
(
CellClass
*
pole
,
const
ContainerClass
*
const
particles
)
{
// the pole represents all particles under
if
(
threadIdx
.
x
==
0
){
FTestCellCudaDescriptor
cell
(
pole
);
cell
.
dataUp
()
+=
particles
->
getNbParticles
();
pole
->
dataUp
+=
particles
->
getNbParticles
();
}
}
/** During upward */
__device__
void
M2M
(
unsigned
char
*
const
pole
,
const
unsigned
char
*
const
*
const
child
,
const
int
/*level*/
)
{
__device__
void
M2M
(
CellClass
*
pole
,
const
CellClass
*
child
[
8
]
,
const
int
/*level*/
)
{
if
(
threadIdx
.
x
==
0
)
{
FTestCellCudaDescriptor
cell
(
pole
);
// A parent represents the sum of the child
for
(
int
idx
=
0
;
idx
<
8
;
++
idx
){
if
(
child
[
idx
]){
FTestCellCudaConstDescriptor
childCell
(
child
[
idx
]);
cell
.
dataUp
()
+=
childCell
.
dataUp
();
pole
->
dataUp
+=
child
[
idx
]
->
dataUp
;
}
}
}
}
/** Before Downward */
__device__
void
M2L
(
unsigned
char
*
const
local
,
const
unsigned
char
*
distantNeighbors
[
343
],
const
int
/*size*/
,
const
int
/*level*/
)
{
__device__
void
M2L
(
CellClass
*
local
,
const
CellClass
*
distantNeighbors
[
343
],
const
int
/*size*/
,
const
int
/*level*/
)
{
if
(
threadIdx
.
x
==
0
)
{
FTestCellCudaDescriptor
cell
(
local
);
// The pole is impacted by what represent other poles
for
(
int
idx
=
0
;
idx
<
343
;
++
idx
){
if
(
distantNeighbors
[
idx
]){
FTestCellCudaConstDescriptor
interCell
(
distantNeighbors
[
idx
]);
cell
.
dataDown
()
+=
interCell
.
dataUp
();
local
->
dataDown
+=
distantNeighbors
[
idx
]
->
dataUp
;
}
}
}
}
/** During Downward */
__device__
void
L2L
(
const
unsigned
char
*
const
local
,
unsigned
char
**
const
child
,
const
int
/*level*/
)
{
__device__
void
L2L
(
const
CellClass
*
local
,
CellClass
*
child
[
8
]
,
const
int
/*level*/
)
{
if
(
threadIdx
.
x
==
0
)
{
FTestCellCudaConstDescriptor
cell
(
local
);
// Each child is impacted by the father
for
(
int
idx
=
0
;
idx
<
8
;
++
idx
){
if
(
child
[
idx
]){
FTestCellCudaDescriptor
cellChild
(
child
[
idx
]);
cellChild
.
dataDown
()
+=
cell
.
dataDown
();
child
[
idx
]
->
dataDown
+=
local
->
dataDown
;
}
}
}
}
/** After Downward */
__device__
void
L2P
(
const
unsigned
char
*
const
local
,
ContainerClass
*
const
particles
){
__device__
void
L2P
(
const
CellClass
*
local
,
ContainerClass
*
const
particles
){
if
(
threadIdx
.
x
==
0
)
{
FTestCellCudaConstDescriptor
cell
(
local
);
// The particles is impacted by the parent cell
long
long
int
*
const
particlesAttributes
=
particles
->
template
getAttribute
<
0
>();
for
(
int
idxPart
=
0
;
idxPart
<
particles
->
getNbParticles
()
;
++
idxPart
){
particlesAttributes
[
idxPart
]
+=
cell
.
dataDown
()
;
particlesAttributes
[
idxPart
]
+=
local
->
dataDown
;
}
}
}
...
...
@@ -175,16 +110,6 @@ public:
}
}
__device__
MortonIndex
getMortonIndex
(
const
unsigned
char
*
cell
)
const
{
FTestCellCudaConstDescriptor
cellAccess
(
cell
);
return
cellAccess
.
getMortonIndex
();
}
__device__
int3
getCoordinate
(
const
unsigned
char
*
cell
)
const
{
FTestCellCudaConstDescriptor
cellAccess
(
cell
);
return
cellAccess
.
getCoordinate
();
}
__host__
static
FTestCudaKernels
*
InitKernelKernel
(
void
*
){
return
nullptr
;
}
...
...
Src/GroupTree/Cuda/FCudaTreeCoordinate.hpp
View file @
1f77bf7c
...
...
@@ -7,6 +7,14 @@
class
FCudaTreeCoordinate
{
public:
__device__
static
int3
ConvertCoordinate
(
const
int
coordinate
[
3
])
{
int3
coord
;
coord
.
x
=
coordinate
[
0
];
coord
.
y
=
coordinate
[
1
];
coord
.
z
=
coordinate
[
2
];
return
coord
;
}
__device__
static
int3
GetPositionFromMorton
(
MortonIndex
inIndex
,
const
int
inLevel
){
MortonIndex
mask
=
0x1LL
;
...
...
Src/GroupTree/FGroupTaskStarpuAlgorithm.hpp
View file @
1f77bf7c
...
...
@@ -30,6 +30,7 @@
#include
"Cuda/FCudaGroupAttachedLeaf.hpp"
#include
"Cuda/FCudaGroupOfParticles.hpp"
#include
"Cuda/FCudaGroupOfCells.hpp"
#include
"Cuda/FCudaEmptyCell.hpp"
#endif
#ifdef ScalFMM_ENABLE_OPENCL_KERNEL
#include
"FStarPUOpenClWrapper.hpp"
...
...
@@ -39,7 +40,7 @@
template
<
class
OctreeClass
,
class
CellContainerClass
,
class
CellClass
,
class
KernelClass
,
class
ParticleGroupClass
,
class
ParticleContainerClass
#ifdef ScalFMM_ENABLE_CUDA_KERNEL
,
class
CudaCellContainerClass
=
FCudaGroupOfCells
<
0
>,
class
CudaParticleGroupClass
=
FCudaGroupOfParticles
<
0
,
int
>
,
class
CudaParticleContainerClass
=
FCudaGroupAttachedLeaf
<
0
,
int
>
,
,
class
CudaCellClass
=
FCudaEmptyCell
,
class
CudaCellContainerClass
=
FCudaGroupOfCells
<
FCudaEmptyCell
>,
class
CudaParticleGroupClass
=
FCudaGroupOfParticles
<
0
,
int
>
,
class
CudaParticleContainerClass
=
FCudaGroupAttachedLeaf
<
0
,
int
>
,
class
CudaKernelClass
=
FCudaEmptyKernel
<>
#endif
#ifdef ScalFMM_ENABLE_OPENCL_KERNEL
...
...
@@ -50,7 +51,7 @@ class FGroupTaskStarPUAlgorithm {
protected:
typedef
FGroupTaskStarPUAlgorithm
<
OctreeClass
,
CellContainerClass
,
CellClass
,
KernelClass
,
ParticleGroupClass
,
ParticleContainerClass
#ifdef ScalFMM_ENABLE_CUDA_KERNEL
,
CudaCellContainerClass
,
CudaParticleGroupClass
,
CudaParticleContainerClass
,
CudaKernelClass
,
CudaCellClass
,
CudaCellContainerClass
,
CudaParticleGroupClass
,
CudaParticleContainerClass
,
CudaKernelClass
#endif
#ifdef ScalFMM_ENABLE_OPENCL_KERNEL
,
OpenCLDeviceWrapperClass
...
...
@@ -90,7 +91,7 @@ protected:
StarPUCpuWrapperClass
cpuWrapper
;
#endif
#ifdef ScalFMM_ENABLE_CUDA_KERNEL
typedef
FStarPUCudaWrapper
<
KernelClass
,
CudaCellContainerClass
,
CudaParticleGroupClass
,
CudaParticleContainerClass
,
CudaKernelClass
>
StarPUCudaWrapperClass
;
typedef
FStarPUCudaWrapper
<
KernelClass
,
CudaCellClass
,
CudaCellContainerClass
,
CudaParticleGroupClass
,
CudaParticleContainerClass
,
CudaKernelClass
>
StarPUCudaWrapperClass
;
StarPUCudaWrapperClass
cudaWrapper
;
#endif
#ifdef ScalFMM_ENABLE_OPENCL_KERNEL
...
...
Src/GroupTree/FStarPUCudaWrapper.hpp
View file @
1f77bf7c
...
...
@@ -33,12 +33,12 @@
#include
"FStarPUUtils.hpp"
template
<
class
KernelClass
,
class
CudaCellGroupClass
,
template
<
class
KernelClass
,
class
CellClass
,
class
CudaCellGroupClass
,
class
CudaParticleGroupClass
,
class
CudaParticleContainerClass
,
class
CudaKernelClass
>
class
FStarPUCudaWrapper
{
protected:
typedef
FStarPUCudaWrapper
<
KernelClass
,
CudaCellGroupClass
,
CudaParticleGroupClass
,
CudaParticleContainerClass
,
CudaKernelClass
>
ThisClass
;
typedef
FStarPUCudaWrapper
<
KernelClass
,
CellClass
,
CudaCellGroupClass
,
CudaParticleGroupClass
,
CudaParticleContainerClass
,
CudaKernelClass
>
ThisClass
;
template
<
class
OtherBlockClass
>
struct
BlockInteractions
{
...
...
@@ -81,7 +81,7 @@ public:
CudaKernelClass
*
kernel
=
worker
->
get
<
ThisClass
>
(
FSTARPU_CPU_IDX
)
->
kernels
[
starpu_worker_get_id
()];
FCuda__bottomPassCallback
<
CudaCellGroupClass
,
CudaParticleGroupClass
,
CudaParticleContainerClass
,
CudaKernelClass
>
((
unsigned
char
*
)
STARPU_VARIABLE_GET_PTR
(
buffers
[
0
]),
FCuda__bottomPassCallback
<
CellClass
,
CudaCellGroupClass
,
CudaParticleGroupClass
,
CudaParticleContainerClass
,
CudaKernelClass
>
((
unsigned
char
*
)
STARPU_VARIABLE_GET_PTR
(
buffers
[
0
]),
STARPU_VARIABLE_GET_ELEMSIZE
(
buffers
[
0
]),
(
unsigned
char
*
)
STARPU_VARIABLE_GET_PTR
(
buffers
[
1
]),
STARPU_VARIABLE_GET_ELEMSIZE
(
buffers
[
1
]),
...
...
@@ -112,7 +112,7 @@ public:
CudaKernelClass
*
kernel
=
worker
->
get
<
ThisClass
>
(
FSTARPU_CPU_IDX
)
->
kernels
[
starpu_worker_get_id
()];
FCuda__upwardPassCallback
<
CudaCellGroupClass
,
CudaParticleGroupClass
,
CudaParticleContainerClass
,
CudaKernelClass
>
((
unsigned
char
*
)
STARPU_VARIABLE_GET_PTR
(
buffers
[
0
]),
FCuda__upwardPassCallback
<
CellClass
,
CudaCellGroupClass
,
CudaParticleGroupClass
,
CudaParticleContainerClass
,
CudaKernelClass
>
((
unsigned
char
*
)
STARPU_VARIABLE_GET_PTR
(
buffers
[
0
]),
STARPU_VARIABLE_GET_ELEMSIZE
(
buffers
[
0
]),
subCellGroupsPtr
,
subCellGroupsSize
,
nbSubCellGroups
,
idxLevel
,
kernel
,
starpu_cuda_get_local_stream
());
...
...
@@ -135,7 +135,7 @@ public:
CudaKernelClass
*
kernel
=
worker
->
get
<
ThisClass
>
(
FSTARPU_CPU_IDX
)
->
kernels
[
starpu_worker_get_id
()];
FCuda__transferInoutPassCallbackMpi
<
CudaCellGroupClass
,
CudaParticleGroupClass
,
CudaParticleContainerClass
,
CudaKernelClass
>
((
unsigned
char
*
)
STARPU_VARIABLE_GET_PTR
(
buffers
[
0
]),
FCuda__transferInoutPassCallbackMpi
<
CellClass
,
CudaCellGroupClass
,
CudaParticleGroupClass
,
CudaParticleContainerClass
,
CudaKernelClass
>
((
unsigned
char
*
)
STARPU_VARIABLE_GET_PTR
(
buffers
[
0
]),
STARPU_VARIABLE_GET_ELEMSIZE
(
buffers
[
0
]),
(
unsigned
char
*
)
STARPU_VARIABLE_GET_PTR
(
buffers
[
1
]),
STARPU_VARIABLE_GET_ELEMSIZE
(
buffers
[
1
]),
...
...
@@ -158,7 +158,7 @@ public:
CudaKernelClass
*
kernel
=
worker
->
get
<
ThisClass
>
(
FSTARPU_CPU_IDX
)
->
kernels
[
starpu_worker_get_id
()];
FCuda__transferInPassCallback
<
CudaCellGroupClass
,
CudaParticleGroupClass
,
CudaParticleContainerClass
,
CudaKernelClass
>
((
unsigned
char
*
)
STARPU_VARIABLE_GET_PTR
(
buffers
[
0
]),
FCuda__transferInPassCallback
<
CellClass
,
CudaCellGroupClass
,
CudaParticleGroupClass
,
CudaParticleContainerClass
,
CudaKernelClass
>
((
unsigned
char
*
)
STARPU_VARIABLE_GET_PTR
(
buffers
[
0
]),
STARPU_VARIABLE_GET_ELEMSIZE
(
buffers
[
0
]),
idxLevel
,
kernel
,
starpu_cuda_get_local_stream
());
}
...
...
@@ -179,7 +179,7 @@ public:
CudaKernelClass
*
kernel
=
worker
->
get
<
ThisClass
>
(
FSTARPU_CPU_IDX
)
->
kernels
[
starpu_worker_get_id
()];
FCuda__transferInoutPassCallback
<
CudaCellGroupClass
,
CudaParticleGroupClass
,
CudaParticleContainerClass
,
CudaKernelClass
>
((
unsigned
char
*
)
STARPU_VARIABLE_GET_PTR
(
buffers
[
0
]),
FCuda__transferInoutPassCallback
<
CellClass
,
CudaCellGroupClass
,
CudaParticleGroupClass
,
CudaParticleContainerClass
,
CudaKernelClass
>
((
unsigned
char
*
)
STARPU_VARIABLE_GET_PTR
(
buffers
[
0
]),
STARPU_VARIABLE_GET_ELEMSIZE
(
buffers
[
0
]),
(
unsigned
char
*
)
STARPU_VARIABLE_GET_PTR
(
buffers
[
1
]),
STARPU_VARIABLE_GET_ELEMSIZE
(
buffers
[
1
]),
...
...
@@ -210,7 +210,7 @@ public:
CudaKernelClass
*
kernel
=
worker
->
get
<
ThisClass
>
(
FSTARPU_CPU_IDX
)
->
kernels
[
starpu_worker_get_id
()];
FCuda__downardPassCallback
<
CudaCellGroupClass
,
CudaParticleGroupClass
,
CudaParticleContainerClass
,
CudaKernelClass
>
((
unsigned
char
*
)
STARPU_VARIABLE_GET_PTR
(
buffers
[
0
]),
FCuda__downardPassCallback
<
CellClass
,
CudaCellGroupClass
,
CudaParticleGroupClass
,
CudaParticleContainerClass
,
CudaKernelClass
>
((
unsigned
char
*
)
STARPU_VARIABLE_GET_PTR
(
buffers
[
0
]),
STARPU_VARIABLE_GET_ELEMSIZE
(
buffers
[
0
]),
subCellGroupsPtr
,
subCellGroupsSize
,
nbSubCellGroups
,
idxLevel
,
kernel
,
starpu_cuda_get_local_stream
());
...
...
@@ -232,7 +232,7 @@ public:
CudaKernelClass
*
kernel
=
worker
->
get
<
ThisClass
>
(
FSTARPU_CPU_IDX
)
->
kernels
[
starpu_worker_get_id
()];
FCuda__directInoutPassCallbackMpi
<
CudaCellGroupClass
,
CudaParticleGroupClass
,
CudaParticleContainerClass
,
CudaKernelClass
>
((
unsigned
char
*
)
STARPU_VARIABLE_GET_PTR
(
buffers
[
0
]),
FCuda__directInoutPassCallbackMpi
<
CellClass
,
CudaCellGroupClass
,
CudaParticleGroupClass
,
CudaParticleContainerClass
,
CudaKernelClass
>
((
unsigned
char
*
)
STARPU_VARIABLE_GET_PTR
(
buffers
[
0
]),
STARPU_VARIABLE_GET_ELEMSIZE
(
buffers
[
0
]),
(
unsigned
char
*
)
STARPU_VARIABLE_GET_PTR
(
buffers
[
1
]),
STARPU_VARIABLE_GET_ELEMSIZE
(
buffers
[
1
]),
...
...
@@ -252,7 +252,7 @@ public:
starpu_codelet_unpack_args
(
cl_arg
,
&
worker
);
CudaKernelClass
*
kernel
=
worker
->
get
<
ThisClass
>
(
FSTARPU_CPU_IDX
)
->
kernels
[
starpu_worker_get_id
()];
FCuda__directInPassCallback
<
CudaCellGroupClass
,
CudaParticleGroupClass
,
CudaParticleContainerClass
,
CudaKernelClass
>
((
unsigned
char
*
)
STARPU_VARIABLE_GET_PTR
(
buffers
[
0
]),
FCuda__directInPassCallback
<
CellClass
,
CudaCellGroupClass
,
CudaParticleGroupClass
,
CudaParticleContainerClass
,
CudaKernelClass
>
((
unsigned
char
*
)
STARPU_VARIABLE_GET_PTR
(
buffers
[
0
]),
STARPU_VARIABLE_GET_ELEMSIZE
(
buffers
[
0
]),
worker
->
get
<
ThisClass
>
(
FSTARPU_CPU_IDX
)
->
treeHeight
,
kernel
,
starpu_cuda_get_local_stream
());
}
...
...
@@ -269,7 +269,7 @@ public:
CudaKernelClass
*
kernel
=
worker
->
get
<
ThisClass
>
(
FSTARPU_CPU_IDX
)
->
kernels
[
starpu_worker_get_id
()];
FCuda__directInoutPassCallback
<
CudaCellGroupClass
,
CudaParticleGroupClass
,
CudaParticleContainerClass
,
CudaKernelClass
>
((
unsigned
char
*
)
STARPU_VARIABLE_GET_PTR
(
buffers
[
0
]),
FCuda__directInoutPassCallback
<
CellClass
,
CudaCellGroupClass
,
CudaParticleGroupClass
,
CudaParticleContainerClass
,
CudaKernelClass
>
((
unsigned
char
*
)
STARPU_VARIABLE_GET_PTR
(
buffers
[
0
]),
STARPU_VARIABLE_GET_ELEMSIZE
(
buffers
[
0
]),
(
unsigned
char
*
)
STARPU_VARIABLE_GET_PTR
(
buffers
[
1
]),
STARPU_VARIABLE_GET_ELEMSIZE
(
buffers
[
1
]),
...
...
@@ -293,7 +293,7 @@ public:
CudaKernelClass
*
kernel
=
worker
->
get
<
ThisClass
>
(
FSTARPU_CPU_IDX
)
->
kernels
[
starpu_worker_get_id
()];
FCuda__mergePassCallback
<
CudaCellGroupClass
,
CudaParticleGroupClass
,
CudaParticleContainerClass
,
CudaKernelClass
>
((
unsigned
char
*
)
STARPU_VARIABLE_GET_PTR
(
buffers
[
0
]),
FCuda__mergePassCallback
<
CellClass
,
CudaCellGroupClass
,
CudaParticleGroupClass
,
CudaParticleContainerClass
,
CudaKernelClass
>
((
unsigned
char
*
)
STARPU_VARIABLE_GET_PTR
(
buffers
[
0
]),
STARPU_VARIABLE_GET_ELEMSIZE
(
buffers
[
0
]),
(
unsigned
char
*
)
STARPU_VARIABLE_GET_PTR
(
buffers
[
1
]),
STARPU_VARIABLE_GET_ELEMSIZE
(
buffers
[
1
]),
...
...
Src/GroupTree/FTestCellPOD.hpp
0 → 100644
View file @
1f77bf7c
#ifndef FTESTCELLPOD_HPP
#define FTESTCELLPOD_HPP
#include
"../Utils/FGlobal.hpp"
#include
"../Containers/FTreeCoordinate.hpp"
#include
"FStarPUDefaultAlign.hpp"
struct
alignas
(
FStarPUDefaultAlign
::
StructAlign
)
FTestCellPODCore
{
MortonIndex
mortonIndex
;
int
coordinates
[
3
];
long
long
int
dataUp
,
dataDown
;
};
class
alignas
(
FStarPUDefaultAlign
::
StructAlign
)
FTestCellPOD
{
protected:
FTestCellPODCore
data
;
public:
FTestCellPOD
()
{
data
.
mortonIndex
=
(
0
);
data
.
dataUp
=
(
0
);
data
.
dataDown
=
(
0
);
data
.
coordinates
[
0
]
=
0
;
data
.
coordinates
[
1
]
=
0
;
data
.
coordinates
[
2
]
=
0
;
}
/** To get the morton index */
MortonIndex
getMortonIndex
()
const
{
return
data
.
mortonIndex
;
}
/** To set the morton index */
void
setMortonIndex
(
const
MortonIndex
inMortonIndex
)
{
data
.
mortonIndex
=
inMortonIndex
;
}
/** To get the position */
FTreeCoordinate
getCoordinate
()
const
{
return
FTreeCoordinate
(
data
.
coordinates
[
0
],
data
.
coordinates
[
1
],
data
.
coordinates
[
2
]);
}
/** To set the position */
void
setCoordinate
(
const
FTreeCoordinate
&
inCoordinate
)
{
data
.
coordinates
[
0
]
=
inCoordinate
.
getX
();
data
.
coordinates
[
1
]
=
inCoordinate
.
getY
();
data
.
coordinates
[
2
]
=
inCoordinate
.
getZ
();
}
/** To set the position from 3 FReals */
void
setCoordinate
(
const
int
inX
,
const
int
inY
,
const
int
inZ
)
{
data
.
coordinates
[
0
]
=
inX
;
data
.
coordinates
[
1
]
=
inY
;
data
.
coordinates
[
2
]
=
inZ
;
}
/** When doing the upward pass */
long
long
int
getDataUp
()
const
{
return
data
.
dataUp
;
}
/** When doing the upward pass */
void
setDataUp
(
const
long
long
int
inData
){
data
.
dataUp
=
inData
;
}
/** When doing the downard pass */
long
long
int
getDataDown
()
const
{
return
data
.
dataDown
;
}
/** When doing the downard pass */
void
setDataDown
(
const
long
long
int
inData
){
data
.
dataDown
=
inData
;
}
/** Make it like the begining */
void
resetToInitialState
(){
data
.
dataDown
=
0
;
data
.
dataUp
=
0
;
}
/////////////////////////////////////////////////
/** Save the current cell in a buffer */
template
<
class
BufferWriterClass
>
void
save
(
BufferWriterClass
&
buffer
)
const
{
buffer
<<
data
.
mortonIndex
<<
data
.
coordinates
[
0
]
<<
data
.
coordinates
[
1
]
<<
data
.
coordinates
[
2
];
buffer
<<
data
.
dataDown
<<
data
.
dataUp
;
}
/** Restore the current cell from a buffer */
template
<
class
BufferReaderClass
>
void
restore
(
BufferReaderClass
&
buffer
){
buffer
>>
data
.
mortonIndex
>>
data
.
coordinates
[
0
]
>>
data
.
coordinates
[
1
]
>>
data
.
coordinates
[
2
];
buffer
>>
data
.
dataDown
>>
data
.
dataUp
;
}
int
getSavedSize
()
const
{
return
int
(
sizeof
(
data
.
mortonIndex
)
+
sizeof
(
data
.
coordinates
[
0
])
+
sizeof
(
data
.
coordinates
[
1
])
+
sizeof
(
data
.
coordinates
[
2
])
+
sizeof
(
data
.
dataDown
)
+
sizeof
(
data
.
dataUp
));
}
/////////////////////////////////////////////////
/** Serialize only up data in a buffer */
template
<
class
BufferWriterClass
>
void
serializeUp
(
BufferWriterClass
&
buffer
)
const
{
buffer
<<
data
.
dataUp
;