Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
solverstack
ScalFMM
Commits
527b7753
Commit
527b7753
authored
Apr 12, 2016
by
Berenger Bramas
Browse files
Add extra test file to force P2P M2L on the GPU
parent
2b18ec60
Changes
1
Hide whitespace changes
Inline
Side-by-side
Tests/GroupTree/testBlockedUnifCudaBench.cpp
0 → 100644
View file @
527b7753
// ==== CMAKE =====
// @FUSE_BLAS
// @FUSE_FFT
// @FUSE_STARPU
// @FUSE_CUDA
// ================
// Keep in private GIT
#include
"../../Src/Utils/FGlobal.hpp"
#include
"../../Src/GroupTree/Core/FGroupTree.hpp"
#include
"../../Src/Components/FSimpleLeaf.hpp"
#include
"../../Src/Containers/FVector.hpp"
#include
"../../Src/Kernels/P2P/FP2PParticleContainer.hpp"
#include
"Kernels/Interpolation/FInterpMatrixKernel.hpp"
#include
"../../Src/Kernels/Uniform/FUnifKernel.hpp"
#include
"../../Src/GroupTree/Uniform/FUnifCellPOD.hpp"
#include
"../../Src/GroupTree/Uniform/FUnifCudaCellPOD.hpp"
#include
"../../Src/Utils/FMath.hpp"
#include
"../../Src/Utils/FMemUtils.hpp"
#include
"../../Src/Utils/FParameters.hpp"
#include
"../../Src/Files/FRandomLoader.hpp"
#include
"../../Src/Files/FFmaGenericLoader.hpp"
#include
"../../Src/GroupTree/Core/FGroupSeqAlgorithm.hpp"
#include
"../../Src/GroupTree/Core/FGroupTaskAlgorithm.hpp"
#include
"../../Src/GroupTree/Core/FGroupTaskStarpuAlgorithm.hpp"
#include
"../../Src/GroupTree/StarPUUtils/FStarPUKernelCapacities.hpp"
#include
"../../Src/GroupTree/Core/FP2PGroupParticleContainer.hpp"
#include
"../../Src/GroupTree/Cuda/FCudaDeviceWrapper.hpp"
#include
"../../Src/GroupTree/Cuda/FCudaEmptyCellSymb.hpp"
#include
"../../Src/GroupTree/Cuda/FCudaGroupOfParticles.hpp"
#include
"../../Src/GroupTree/Cuda/FCudaGroupOfCells.hpp"
#include
"../../Src/GroupTree/Uniform/FUnifCudaSharedData.hpp"
#include
"../../Src/Utils/FParameterNames.hpp"
#include
<memory>
template
<
class
FReal
,
int
ORDER
>
class
FUnifCuda
;
#define RANDOM_PARTICLES
template
<
class
BaseClass
>
class
FStarPUP2PM2LCudaOnlyCapacities
:
public
BaseClass
,
public
FStarPUAbstractCapacities
{
bool
check
(
const
FStarPUTypes
inPu
)
const
override
{
return
inPu
==
FSTARPU_CPU_IDX
;
}
public:
using
BaseClass
::
BaseClass
;
bool
supportP2P
(
const
FStarPUTypes
inPu
)
const
override
{
return
inPu
==
FSTARPU_CUDA_IDX
;
}
bool
supportP2PExtern
(
const
FStarPUTypes
inPu
)
const
override
{
return
inPu
==
FSTARPU_CUDA_IDX
;
}
bool
supportP2PMpi
(
const
FStarPUTypes
inPu
)
const
override
{
return
inPu
==
FSTARPU_CUDA_IDX
;
}
bool
supportM2L
(
const
FStarPUTypes
inPu
)
const
override
{
return
inPu
==
FSTARPU_CUDA_IDX
;
}
bool
supportM2LExtern
(
const
FStarPUTypes
inPu
)
const
override
{
return
inPu
==
FSTARPU_CUDA_IDX
;
}
bool
supportM2LMpi
(
const
FStarPUTypes
inPu
)
const
override
{
return
inPu
==
FSTARPU_CUDA_IDX
;
}
};
const
FParameterNames
LocalOptionBlocSize
{
{
"-bs"
},
"The size of the block of the blocked tree"
};
const
FParameterNames
LocalOptionValidate
{
{
"-validation"
},
"To compare with direct computation"
};
template
<
template
<
class
>
class
PUCapacities
>
int
mainCore
(
int
argc
,
char
*
argv
[]){
// Initialize the types
typedef
double
FReal
;
static
const
int
ORDER
=
5
;
typedef
FInterpMatrixKernelR
<
FReal
>
MatrixKernelClass
;
typedef
FUnifCellPODCore
GroupCellSymbClass
;
typedef
FUnifCellPODPole
<
FReal
,
ORDER
>
GroupCellUpClass
;
typedef
FUnifCellPODLocal
<
FReal
,
ORDER
>
GroupCellDownClass
;
typedef
FUnifCellPOD
<
FReal
,
ORDER
>
GroupCellClass
;
typedef
FP2PGroupParticleContainer
<
FReal
>
GroupContainerClass
;
typedef
FGroupTree
<
FReal
,
GroupCellClass
,
GroupCellSymbClass
,
GroupCellUpClass
,
GroupCellDownClass
,
GroupContainerClass
,
1
,
4
,
FReal
>
GroupOctreeClass
;
typedef
PUCapacities
<
FUnifKernel
<
FReal
,
GroupCellClass
,
GroupContainerClass
,
MatrixKernelClass
,
ORDER
>>
GroupKernelClass
;
typedef
FStarPUCpuWrapper
<
typename
GroupOctreeClass
::
CellGroupClass
,
GroupCellClass
,
GroupKernelClass
,
typename
GroupOctreeClass
::
ParticleGroupClass
,
GroupContainerClass
>
GroupCpuWrapper
;
typedef
FStarPUCudaWrapper
<
GroupKernelClass
,
FBasicCellPOD
,
FCudaUnifCellPODPole
<
FReal
,
ORDER
>
,
FCudaUnifCellPODLocal
<
FReal
,
ORDER
>
,
FCudaGroupOfCells
<
FBasicCellPOD
,
FCudaUnifCellPODPole
<
FReal
,
ORDER
>
,
FCudaUnifCellPODLocal
<
FReal
,
ORDER
>
>
,
FCudaGroupOfParticles
<
FReal
,
1
,
4
,
FReal
>
,
FCudaGroupAttachedLeaf
<
FReal
,
1
,
4
,
FReal
>
,
FUnifCuda
<
FReal
,
ORDER
>
>
GroupCudaWrapper
;
typedef
FGroupTaskStarPUAlgorithm
<
GroupOctreeClass
,
typename
GroupOctreeClass
::
CellGroupClass
,
GroupKernelClass
,
typename
GroupOctreeClass
::
ParticleGroupClass
,
GroupCpuWrapper
,
GroupCudaWrapper
>
GroupAlgorithm
;
// Get params
const
int
NbLevels
=
FParameters
::
getValue
(
argc
,
argv
,
FParameterDefinitions
::
OctreeHeight
.
options
,
5
);
const
int
groupSize
=
FParameters
::
getValue
(
argc
,
argv
,
LocalOptionBlocSize
.
options
,
250
);
// Load the particles
#ifdef RANDOM_PARTICLES
FRandomLoader
<
FReal
>
loader
(
FParameters
::
getValue
(
argc
,
argv
,
FParameterDefinitions
::
NbParticles
.
options
,
2000
),
1.0
,
FPoint
<
FReal
>
(
0
,
0
,
0
),
0
);
#else
const
char
*
const
filename
=
FParameters
::
getStr
(
argc
,
argv
,
FParameterDefinitions
::
InputFile
.
options
,
"../Data/test20k.fma"
);
FFmaGenericLoader
<
FReal
>
loader
(
filename
);
#endif
FAssertLF
(
loader
.
isOpen
());
FTic
timer
;
FP2PParticleContainer
<
FReal
>
allParticles
;
for
(
FSize
idxPart
=
0
;
idxPart
<
loader
.
getNumberOfParticles
()
;
++
idxPart
){
FPoint
<
FReal
>
particlePosition
;
FReal
physicalValue
;
#ifdef RANDOM_PARTICLES
physicalValue
=
0.10
;
loader
.
fillParticle
(
&
particlePosition
);
#else
loader
.
fillParticle
(
&
particlePosition
,
&
physicalValue
);
#endif
allParticles
.
push
(
particlePosition
,
physicalValue
);
}
std
::
cout
<<
"Particles loaded in "
<<
timer
.
tacAndElapsed
()
<<
"s
\n
"
;
// Put the data into the tree
timer
.
tic
();
GroupOctreeClass
groupedTree
(
NbLevels
,
loader
.
getBoxWidth
(),
loader
.
getCenterOfBox
(),
groupSize
,
&
allParticles
);
groupedTree
.
printInfoBlocks
();
std
::
cout
<<
"Tree created in "
<<
timer
.
tacAndElapsed
()
<<
"s
\n
"
;
// Run the algorithm
const
MatrixKernelClass
MatrixKernel
;
GroupKernelClass
groupkernel
(
NbLevels
,
loader
.
getBoxWidth
(),
loader
.
getCenterOfBox
(),
&
MatrixKernel
);
GroupAlgorithm
groupalgo
(
&
groupedTree
,
&
groupkernel
);
{
typedef
FUnifM2LHandler
<
FReal
,
ORDER
,
MatrixKernelClass
::
Type
>
M2LHandlerClass
;
const
M2LHandlerClass
M2LHandler
(
&
MatrixKernel
,
NbLevels
,
loader
.
getBoxWidth
(),
1
);
// Copy precomputed matrix to cuda
FUnifCudaSharedData
<
FReal
,
ORDER
>
hostData
;
hostData
.
BoxWidth
=
loader
.
getBoxWidth
();
std
::
cout
<<
"Copy precomputed tab of size "
<<
hostData
.
ninteractions
<<
" "
<<
hostData
.
opt_rc
<<
std
::
endl
;
for
(
int
idxInter
=
0
;
idxInter
<
hostData
.
ninteractions
;
++
idxInter
){
for
(
int
idx
=
0
;
idx
<
hostData
.
opt_rc
;
++
idx
){
hostData
.
FC
[
hostData
.
opt_rc
*
idxInter
+
idx
].
complex
[
0
]
=
M2LHandler
.
getFc
(
idxInter
,
idx
).
getReal
();
hostData
.
FC
[
hostData
.
opt_rc
*
idxInter
+
idx
].
complex
[
1
]
=
M2LHandler
.
getFc
(
idxInter
,
idx
).
getImag
();
}
}
std
::
cout
<<
"Copy to GPU"
<<
std
::
endl
;
groupalgo
.
forEachCudaWorker
([
&
](
void
*
cudaKernel
){
FUnifCudaFillObject
(
cudaKernel
,
hostData
);
});
std
::
cout
<<
"Done"
<<
std
::
endl
;
}
timer
.
tic
();
groupalgo
.
execute
();
std
::
cout
<<
"Kernel executed in in "
<<
timer
.
tacAndElapsed
()
<<
"s
\n
"
;
// Validate the result
if
(
FParameters
::
existParameter
(
argc
,
argv
,
LocalOptionValidate
.
options
)){
FSize
offsetParticles
=
0
;
FReal
*
const
allPhysicalValues
=
allParticles
.
getPhysicalValues
();
FReal
*
const
allPosX
=
const_cast
<
FReal
*>
(
allParticles
.
getPositions
()[
0
]);
FReal
*
const
allPosY
=
const_cast
<
FReal
*>
(
allParticles
.
getPositions
()[
1
]);
FReal
*
const
allPosZ
=
const_cast
<
FReal
*>
(
allParticles
.
getPositions
()[
2
]);
groupedTree
.
forEachCellLeaf
<
FP2PGroupParticleContainer
<
FReal
>
>
([
&
](
GroupCellClass
cellTarget
,
FP2PGroupParticleContainer
<
FReal
>
*
leafTarget
){
const
FReal
*
const
physicalValues
=
leafTarget
->
getPhysicalValues
();
const
FReal
*
const
posX
=
leafTarget
->
getPositions
()[
0
];
const
FReal
*
const
posY
=
leafTarget
->
getPositions
()[
1
];
const
FReal
*
const
posZ
=
leafTarget
->
getPositions
()[
2
];
const
FSize
nbPartsInLeafTarget
=
leafTarget
->
getNbParticles
();
for
(
FSize
idxPart
=
0
;
idxPart
<
nbPartsInLeafTarget
;
++
idxPart
){
allPhysicalValues
[
offsetParticles
+
idxPart
]
=
physicalValues
[
idxPart
];
allPosX
[
offsetParticles
+
idxPart
]
=
posX
[
idxPart
];
allPosY
[
offsetParticles
+
idxPart
]
=
posY
[
idxPart
];
allPosZ
[
offsetParticles
+
idxPart
]
=
posZ
[
idxPart
];
}
offsetParticles
+=
nbPartsInLeafTarget
;
});
FAssertLF
(
offsetParticles
==
loader
.
getNumberOfParticles
());
FReal
*
const
allDirectPotentials
=
allParticles
.
getPotentials
();
FReal
*
const
allDirectforcesX
=
allParticles
.
getForcesX
();
FReal
*
const
allDirectforcesY
=
allParticles
.
getForcesY
();
FReal
*
const
allDirectforcesZ
=
allParticles
.
getForcesZ
();
for
(
int
idxTgt
=
0
;
idxTgt
<
offsetParticles
;
++
idxTgt
){
for
(
int
idxMutual
=
idxTgt
+
1
;
idxMutual
<
offsetParticles
;
++
idxMutual
){
FP2PR
::
MutualParticles
(
allPosX
[
idxTgt
],
allPosY
[
idxTgt
],
allPosZ
[
idxTgt
],
allPhysicalValues
[
idxTgt
],
&
allDirectforcesX
[
idxTgt
],
&
allDirectforcesY
[
idxTgt
],
&
allDirectforcesZ
[
idxTgt
],
&
allDirectPotentials
[
idxTgt
],
allPosX
[
idxMutual
],
allPosY
[
idxMutual
],
allPosZ
[
idxMutual
],
allPhysicalValues
[
idxMutual
],
&
allDirectforcesX
[
idxMutual
],
&
allDirectforcesY
[
idxMutual
],
&
allDirectforcesZ
[
idxMutual
],
&
allDirectPotentials
[
idxMutual
]
);
}
}
FMath
::
FAccurater
<
FReal
>
potentialDiff
;
FMath
::
FAccurater
<
FReal
>
fx
,
fy
,
fz
;
offsetParticles
=
0
;
groupedTree
.
forEachCellLeaf
<
FP2PGroupParticleContainer
<
FReal
>
>
([
&
](
GroupCellClass
cellTarget
,
FP2PGroupParticleContainer
<
FReal
>
*
leafTarget
){
const
FReal
*
const
potentials
=
leafTarget
->
getPotentials
();
const
FReal
*
const
forcesX
=
leafTarget
->
getForcesX
();
const
FReal
*
const
forcesY
=
leafTarget
->
getForcesY
();
const
FReal
*
const
forcesZ
=
leafTarget
->
getForcesZ
();
const
FSize
nbPartsInLeafTarget
=
leafTarget
->
getNbParticles
();
for
(
int
idxTgt
=
0
;
idxTgt
<
nbPartsInLeafTarget
;
++
idxTgt
){
potentialDiff
.
add
(
allDirectPotentials
[
idxTgt
+
offsetParticles
],
potentials
[
idxTgt
]);
fx
.
add
(
allDirectforcesX
[
idxTgt
+
offsetParticles
],
forcesX
[
idxTgt
]);
fy
.
add
(
allDirectforcesY
[
idxTgt
+
offsetParticles
],
forcesY
[
idxTgt
]);
fz
.
add
(
allDirectforcesZ
[
idxTgt
+
offsetParticles
],
forcesZ
[
idxTgt
]);
}
offsetParticles
+=
nbPartsInLeafTarget
;
});
std
::
cout
<<
"Error : Potential "
<<
potentialDiff
<<
"
\n
"
;
std
::
cout
<<
"Error : fx "
<<
fx
<<
"
\n
"
;
std
::
cout
<<
"Error : fy "
<<
fy
<<
"
\n
"
;
std
::
cout
<<
"Error : fz "
<<
fz
<<
"
\n
"
;
}
return
0
;
}
int
main
(
int
argc
,
char
*
argv
[]){
const
FParameterNames
LocalOptionP2PM2LCudaOnly
{
{
"-p2p-m2l-cuda-only"
},
"To compute the P2P and the M2L on CUDA only (the rest is still "
"on the CPU."
};
FHelpDescribeAndExit
(
argc
,
argv
,
"Perform Lagrange Kernel based simulation with CUDA+StarPU."
,
FParameterDefinitions
::
OctreeHeight
,
#ifdef RANDOM_PARTICLES
FParameterDefinitions
::
NbParticles
,
#else
FParameterDefinitions
::
InputFile
,
#endif
FParameterDefinitions
::
NbThreads
,
LocalOptionBlocSize
,
LocalOptionValidate
);
if
(
FParameters
::
existParameter
(
argc
,
argv
,
LocalOptionP2PM2LCudaOnly
.
options
)){
std
::
cout
<<
"/!
\\
P2P and M2L only on GPU
\n
"
;
return
mainCore
<
FStarPUP2PM2LCudaOnlyCapacities
>
(
argc
,
argv
);
}
else
{
return
mainCore
<
FStarPUCudaP2PM2LCapacities
>
(
argc
,
argv
);
}
}
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment