Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
S
ScalFMM
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
5
Issues
5
List
Boards
Labels
Service Desk
Milestones
Operations
Operations
Incidents
Packages & Registries
Packages & Registries
Container Registry
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
solverstack
ScalFMM
Commits
f5d468cc
Commit
f5d468cc
authored
Feb 20, 2015
by
Florent Pruvost
Browse files
Options
Browse Files
Download
Plain Diff
merge with origin/master
parents
36bc62de
beec09ae
Changes
41
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
41 changed files
with
2559 additions
and
591 deletions
+2559
-591
CMakeLists.txt
CMakeLists.txt
+28
-29
Src/Arranger/FAbstractMover.hpp
Src/Arranger/FAbstractMover.hpp
+1
-1
Src/Arranger/FOctreeArranger.hpp
Src/Arranger/FOctreeArranger.hpp
+23
-3
Src/Arranger/FParticleTypedIndexedMover.hpp
Src/Arranger/FParticleTypedIndexedMover.hpp
+77
-0
Src/CMakeLists.txt
Src/CMakeLists.txt
+24
-0
Src/Components/FBasicParticleContainer.hpp
Src/Components/FBasicParticleContainer.hpp
+10
-0
Src/Components/FTestCell.hpp
Src/Components/FTestCell.hpp
+6
-0
Src/Core/FFmmAlgorithmThreadTsm.hpp
Src/Core/FFmmAlgorithmThreadTsm.hpp
+5
-7
Src/GroupTree/Cuda/FCudaDeviceWrapper.cu
Src/GroupTree/Cuda/FCudaDeviceWrapper.cu
+665
-0
Src/GroupTree/Cuda/FCudaDeviceWrapper.hpp
Src/GroupTree/Cuda/FCudaDeviceWrapper.hpp
+67
-0
Src/GroupTree/Cuda/FCudaEmptyKernel.hpp
Src/GroupTree/Cuda/FCudaEmptyKernel.hpp
+63
-0
Src/GroupTree/Cuda/FCudaGlobal.hpp
Src/GroupTree/Cuda/FCudaGlobal.hpp
+15
-0
Src/GroupTree/Cuda/FCudaGroupAttachedLeaf.hpp
Src/GroupTree/Cuda/FCudaGroupAttachedLeaf.hpp
+155
-0
Src/GroupTree/Cuda/FCudaGroupOfCells.hpp
Src/GroupTree/Cuda/FCudaGroupOfCells.hpp
+122
-0
Src/GroupTree/Cuda/FCudaGroupOfParticles.hpp
Src/GroupTree/Cuda/FCudaGroupOfParticles.hpp
+166
-0
Src/GroupTree/Cuda/FCudaTestKernels.hpp
Src/GroupTree/Cuda/FCudaTestKernels.hpp
+153
-0
Src/GroupTree/Cuda/FCudaTreeCoordinate.hpp
Src/GroupTree/Cuda/FCudaTreeCoordinate.hpp
+167
-0
Src/GroupTree/FGroupOfCells.hpp
Src/GroupTree/FGroupOfCells.hpp
+24
-0
Src/GroupTree/FGroupSeqAlgorithm.hpp
Src/GroupTree/FGroupSeqAlgorithm.hpp
+2
-10
Src/GroupTree/FGroupTaskAlgorithm.hpp
Src/GroupTree/FGroupTaskAlgorithm.hpp
+2
-10
Src/GroupTree/FGroupTaskDepAlgorithm.hpp
Src/GroupTree/FGroupTaskDepAlgorithm.hpp
+2
-10
Src/GroupTree/FGroupTaskStarpuAlgorithm.hpp
Src/GroupTree/FGroupTaskStarpuAlgorithm.hpp
+56
-51
Src/GroupTree/FGroupTaskStarpuMpiAlgorithm.hpp
Src/GroupTree/FGroupTaskStarpuMpiAlgorithm.hpp
+66
-61
Src/GroupTree/FGroupTree.hpp
Src/GroupTree/FGroupTree.hpp
+2
-2
Src/GroupTree/FOutOfBlockInteraction.hpp
Src/GroupTree/FOutOfBlockInteraction.hpp
+17
-0
Src/GroupTree/FStarPUCpuWrapper.hpp
Src/GroupTree/FStarPUCpuWrapper.hpp
+6
-14
Src/GroupTree/FStarPUCudaWrapper.hpp
Src/GroupTree/FStarPUCudaWrapper.hpp
+118
-338
Src/GroupTree/FStarPUKernelCapacities.hpp
Src/GroupTree/FStarPUKernelCapacities.hpp
+39
-13
Src/GroupTree/FStarPUOpenClWrapper.hpp
Src/GroupTree/FStarPUOpenClWrapper.hpp
+6
-14
Src/GroupTree/FStarPUUtils.hpp
Src/GroupTree/FStarPUUtils.hpp
+22
-2
Src/Kernels/Chebyshev/FChebSymKernel.hpp
Src/Kernels/Chebyshev/FChebSymKernel.hpp
+1
-1
Src/Kernels/Chebyshev/FChebSymM2LHandler.hpp
Src/Kernels/Chebyshev/FChebSymM2LHandler.hpp
+4
-4
Src/Kernels/P2P/FP2PParticleContainer.hpp
Src/Kernels/P2P/FP2PParticleContainer.hpp
+1
-1
Src/Kernels/Rotation/FRotationCell.hpp
Src/Kernels/Rotation/FRotationCell.hpp
+1
-1
Src/ScalFmmConfig.h.cmake
Src/ScalFmmConfig.h.cmake
+2
-8
Src/Utils/FOffetOf.hpp
Src/Utils/FOffetOf.hpp
+9
-0
Src/Utils/FQuickSort.hpp
Src/Utils/FQuickSort.hpp
+4
-3
Tests/Utils/testOctreeRearrangeTsm.cpp
Tests/Utils/testOctreeRearrangeTsm.cpp
+251
-0
Tests/Utils/testTreeBuilderThread.cpp
Tests/Utils/testTreeBuilderThread.cpp
+0
-6
Tests/noDist/testBlockedWithCudaAlgorithm.cpp
Tests/noDist/testBlockedWithCudaAlgorithm.cpp
+176
-0
Utils/noDist/FmmAlgorithmTsm.cpp
Utils/noDist/FmmAlgorithmTsm.cpp
+1
-2
No files found.
CMakeLists.txt
View file @
f5d468cc
...
...
@@ -13,33 +13,15 @@ set(CMAKE_DISABLE_IN_SOURCE_BUILD ON)
#===========================================================================
project
(
ScalFMM C CXX
)
# directly make an error if in-source build
#if("${PROJECT_SOURCE_DIR}" STREQUAL "${PROJECT_BINARY_DIR}")
# message(FATAL_ERROR "In-source builds are not allowed.\n"
# "Please create a build directory first and execute cmake configuration from "
# "this directory. Example: mkdir build && cd build && cmake ..")
#endif()
#
# check if compiling into source directories
#
string(COMPARE EQUAL "${CMAKE_SOURCE_DIR}" "${CMAKE_BINARY_DIR}" insource)
#
if(insource)
#
message(FATAL_ERROR "${PROJECT_NAME} requires an out of source build. Goto ./Build and tapes cmake ../")
#
endif(insource)
string
(
COMPARE EQUAL
"
${
CMAKE_SOURCE_DIR
}
"
"
${
CMAKE_BINARY_DIR
}
"
insource
)
if
(
insource
)
message
(
FATAL_ERROR
"
${
PROJECT_NAME
}
requires an out of source build. Goto ./Build and tapes cmake ../"
)
endif
(
insource
)
set
(
ScalFMM_CMAKE_MODULE_PATH
${
CMAKE_SOURCE_DIR
}
/CMakeModules
)
# MPI option has to be set before project, cannot be changed in the cache!
#if( ScalFMM_USE_MPI )
# include(CMakeForceCompiler)
# CMAKE_FORCE_C_COMPILER(mpicc "MPI C Compiler")
# CMAKE_FORCE_CXX_COMPILER(mpicxx "MPI C++ Compiler")
# set(ScalFMM_USE_MPI ON CACHE BOOL "ScalFMM use MPI")
#else()
# message(STATUS "Remove CMake cache and run cmake .. -DScalFMM_USE_MPI=ON to enable MPI" )
#endif(ScalFMM_USE_MPI)
#===========================================================================
# Version Number
#===========================================================================
...
...
@@ -66,12 +48,6 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/")
endif
()
include
(
MorseInit
)
#
# Active language
# -----------------------
# enable_language(CXX)
#
#
# Options
option
(
ScalFMM_USE_MPI
"Set to ON to build ScaFMM with MPI"
OFF
)
...
...
@@ -411,6 +387,7 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/")
##################################################################
##################################################################
#
message
(
STATUS
"ScalFMM_USE_STARPU =
${
ScalFMM_USE_STARPU
}
"
)
if
(
ScalFMM_USE_STARPU
)
set
(
ScalFMM_STARPU_VERSION
"1.1"
CACHE STRING
"oldest STARPU version desired"
)
...
...
@@ -441,6 +418,26 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/")
include_directories
(
${
STARPU_INCLUDES
}
)
endif
()
option
(
ScalFMM_USE_CUDA
"Set to ON to use CUDA with StarPU"
OFF
)
message
(
STATUS
"ScalFMM_USE_CUDA =
${
ScalFMM_USE_CUDA
}
"
)
if
(
ScalFMM_USE_CUDA
)
execute_process
(
COMMAND nvcc --version ERROR_VARIABLE cuda_error_output OUTPUT_QUIET
)
if
(
cuda_error_output
)
message
(
FATAL_ERROR
"nvcc is needed with CUDA."
)
endif
()
if
(
NOT DEFINED CUSTOM_CUDA_FLAGS
)
set
(
CUSTOM_CUDA_FLAGS
"-std=c++11;-arch=sm_20"
CACHE STRING
"Set your CUDA flags, for example : -arch=sm_20;-ptxas-options=-v;-use_fast_math"
)
endif
()
# This is needed to remove backslash after space in ADD_CUSTOM_COMMAND
separate_arguments
(
CUSTOM_CUDA_FLAGS
)
message
(
STATUS
"CUSTOM_CUDA_FLAGS =
${
CUSTOM_CUDA_FLAGS
}
"
)
# Add libcudart and cuda.h
# link_directories($ENV{CUDA_LIB})
include_directories
(
$ENV{CUDA_INC}
)
set
(
SCALFMM_LIBRARIES
"
${
SCALFMM_LIBRARIES
}
; -L$ENV{CUDA_LIB}; -lcudart"
)
endif
()
message
(
STATUS
" STARPU_LIBRARIES =
${
STARPU_LIBRARIES
}
"
)
if
(
STARPU_INCLUDE_DIRS
)
message
(
STATUS
" STARPU_INCLUDES =
${
STARPU_INCLUDES
}
"
)
...
...
@@ -448,7 +445,8 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/")
endif
(
ScalFMM_USE_STARPU
)
list
(
APPEND FUSE_LIST
"STARPU"
)
#
list
(
APPEND FUSE_LIST
"CUDA"
)
##################################################################
# Use SSE #
##################################################################
...
...
@@ -479,6 +477,7 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/")
endif
(
${
COMPILE_SSE
}
)
endif
()
list
(
APPEND FUSE_LIST
"SSE"
)
##################################################################
# Use AVX #
##################################################################
...
...
Src/Arranger/FAbstractMover.hpp
View file @
f5d468cc
...
...
@@ -20,7 +20,7 @@ template<class OctreeClass,class ParticleClass>
class
FAbstractMover
{
public:
virtual
void
getParticlePosition
(
ParticleClass
*
lf
,
const
int
idxPart
,
FPoint
*
particlePos
)
=
0
;
virtual
void
removeFromLeafAndKeep
(
ParticleClass
*
lf
,
const
FPoint
&
particlePos
,
const
int
idxPart
)
=
0
;
virtual
void
removeFromLeafAndKeep
(
ParticleClass
*
lf
,
const
FPoint
&
particlePos
,
const
int
idxPart
,
FParticleType
type
)
=
0
;
virtual
void
insertAllParticles
(
OctreeClass
*
tree
)
=
0
;
};
...
...
Src/Arranger/FOctreeArranger.hpp
View file @
f5d468cc
...
...
@@ -74,6 +74,7 @@ public:
octreeIterator
.
gotoBottomLeft
();
do
{
const
MortonIndex
currentMortonIndex
=
octreeIterator
.
getCurrentGlobalIndex
();
//First we test sources
ContainerClass
*
particles
=
octreeIterator
.
getCurrentLeaf
()
->
getSrc
();
for
(
int
idxPart
=
0
;
idxPart
<
particles
->
getNbParticles
();
/*++idxPart*/
){
FPoint
currentPart
;
...
...
@@ -82,15 +83,33 @@ public:
const
MortonIndex
particuleIndex
=
tree
->
getMortonFromPosition
(
currentPart
);
if
(
particuleIndex
!=
currentMortonIndex
){
//Need to move this one
interface
->
removeFromLeafAndKeep
(
particles
,
currentPart
,
idxPart
);
interface
->
removeFromLeafAndKeep
(
particles
,
currentPart
,
idxPart
,
FParticleTypeSource
);
}
else
{
//Need to increment idx;
++
idxPart
;
}
}
//Then we test targets
if
(
octreeIterator
.
getCurrentLeaf
()
->
getTargets
()
!=
particles
){
//Leaf is TypedLeaf
ContainerClass
*
particleTargets
=
octreeIterator
.
getCurrentLeaf
()
->
getTargets
();
for
(
int
idxPart
=
0
;
idxPart
<
particleTargets
->
getNbParticles
();
/*++idxPart*/
){
FPoint
currentPart
;
interface
->
getParticlePosition
(
particleTargets
,
idxPart
,
&
currentPart
);
checkPosition
(
currentPart
);
const
MortonIndex
particuleIndex
=
tree
->
getMortonFromPosition
(
currentPart
);
if
(
particuleIndex
!=
currentMortonIndex
){
//Need to move this one
interface
->
removeFromLeafAndKeep
(
particleTargets
,
currentPart
,
idxPart
,
FParticleTypeTarget
);
}
else
{
//Need to increment idx;
++
idxPart
;
}
}
}
}
while
(
octreeIterator
.
moveRight
());
printf
(
"Insert back particles
\n
"
);
//Insert back the parts that have been removed
interface
->
insertAllParticles
(
tree
);
...
...
@@ -101,7 +120,8 @@ public:
bool
workOnNext
=
true
;
do
{
// Empty leaf
if
(
octreeIterator
.
getCurrentListTargets
()
->
getNbParticles
()
==
0
){
if
(
octreeIterator
.
getCurrentListTargets
()
->
getNbParticles
()
==
0
&&
octreeIterator
.
getCurrentListSrc
()
->
getNbParticles
()
==
0
){
const
MortonIndex
currentIndex
=
octreeIterator
.
getCurrentGlobalIndex
();
workOnNext
=
octreeIterator
.
moveRight
();
tree
->
removeLeaf
(
currentIndex
);
...
...
Src/Arranger/FParticleTypedIndexedMover.hpp
0 → 100644
View file @
f5d468cc
#ifndef FPARTICULETYPEDINDEXEDMOVER_HPP
#define FPARTICULETYPEDINDEXEDMOVER_HPP
#include "FAbstractMover.hpp"
#include "../Containers/FVector.hpp"
/**
* This class should be use with the octree arrange to move particles
* that are typed (src/tgt) and stored in a FBasicParticleContainer
*/
template
<
class
OctreeClass
,
class
ContainerClass
>
class
FParticleTypedIndexedMover
:
public
FAbstractMover
<
OctreeClass
,
ContainerClass
>
{
private:
ContainerClass
toStoreRemovedSourceParts
;
ContainerClass
toStoreRemovedTargetParts
;
public:
FParticleTypedIndexedMover
(){
}
virtual
~
FParticleTypedIndexedMover
(){
}
/** To get the position of the particle at idx idxPart in leaf lf */
void
getParticlePosition
(
ContainerClass
*
lf
,
const
int
idxPart
,
FPoint
*
particlePos
){
(
*
particlePos
)
=
FPoint
(
lf
->
getPositions
()[
0
][
idxPart
],
lf
->
getPositions
()[
1
][
idxPart
],
lf
->
getPositions
()[
2
][
idxPart
]);
}
/** Remove a particle but keep it to reinsert it later*/
void
removeFromLeafAndKeep
(
ContainerClass
*
lf
,
const
FPoint
&
particlePos
,
const
int
idxPart
,
FParticleType
type
){
std
::
array
<
typename
ContainerClass
::
AttributesClass
,
ContainerClass
::
NbAttributes
>
particleValues
;
for
(
int
idxAttr
=
0
;
idxAttr
<
ContainerClass
::
NbAttributes
;
++
idxAttr
){
particleValues
[
idxAttr
]
=
lf
->
getAttribute
(
idxAttr
)[
idxPart
];
}
if
(
type
==
FParticleTypeTarget
){
toStoreRemovedTargetParts
.
push
(
particlePos
,
FParticleTypeTarget
,
lf
->
getIndexes
()[
idxPart
],
particleValues
);
}
else
{
toStoreRemovedSourceParts
.
push
(
particlePos
,
FParticleTypeSource
,
lf
->
getIndexes
()[
idxPart
],
particleValues
);
}
lf
->
removeParticles
(
&
idxPart
,
1
);
}
/** Reinsert the previously saved particles */
void
insertAllParticles
(
OctreeClass
*
tree
){
std
::
array
<
typename
ContainerClass
::
AttributesClass
,
ContainerClass
::
NbAttributes
>
particleValues
;
for
(
int
idxToInsert
=
0
;
idxToInsert
<
toStoreRemovedSourceParts
.
getNbParticles
()
;
++
idxToInsert
){
for
(
int
idxAttr
=
0
;
idxAttr
<
ContainerClass
::
NbAttributes
;
++
idxAttr
){
particleValues
[
idxAttr
]
=
toStoreRemovedSourceParts
.
getAttribute
(
idxAttr
)[
idxToInsert
];
}
const
FPoint
particlePos
(
toStoreRemovedSourceParts
.
getPositions
()[
0
][
idxToInsert
],
toStoreRemovedSourceParts
.
getPositions
()[
1
][
idxToInsert
],
toStoreRemovedSourceParts
.
getPositions
()[
2
][
idxToInsert
]);
tree
->
insert
(
particlePos
,
FParticleTypeSource
,
toStoreRemovedSourceParts
.
getIndexes
()[
idxToInsert
],
particleValues
);
}
for
(
int
idxToInsert
=
0
;
idxToInsert
<
toStoreRemovedTargetParts
.
getNbParticles
()
;
++
idxToInsert
){
for
(
int
idxAttr
=
0
;
idxAttr
<
ContainerClass
::
NbAttributes
;
++
idxAttr
){
particleValues
[
idxAttr
]
=
toStoreRemovedTargetParts
.
getAttribute
(
idxAttr
)[
idxToInsert
];
}
const
FPoint
particlePos
(
toStoreRemovedTargetParts
.
getPositions
()[
0
][
idxToInsert
],
toStoreRemovedTargetParts
.
getPositions
()[
1
][
idxToInsert
],
toStoreRemovedTargetParts
.
getPositions
()[
2
][
idxToInsert
]);
tree
->
insert
(
particlePos
,
FParticleTypeTarget
,
toStoreRemovedTargetParts
.
getIndexes
()[
idxToInsert
],
particleValues
);
}
toStoreRemovedSourceParts
.
clear
();
toStoreRemovedTargetParts
.
clear
();
}
};
#endif //FPARTICULETYPEDINDEXEDMOVER_HPP
Src/CMakeLists.txt
View file @
f5d468cc
...
...
@@ -17,11 +17,35 @@ file(
./*.cpp
)
# Add CUDA files once they are compiled from cu to .o
if
(
ScalFMM_USE_CUDA
)
# Find all the CU files in my project
file
(
GLOB_RECURSE source_cu_files ./*.cu
)
# Iterate and add builind command for each file
set
(
SCALFMM_CUDA_SOURCES
""
)
FOREACH
(
_file
${
source_cu_files
}
)
GET_FILENAME_COMPONENT
(
_filewe
${
_file
}
NAME_WE
)
SET
(
_filehpp_output
${
CMAKE_CURRENT_BINARY_DIR
}
/
${
_filewe
}
.o
)
ADD_CUSTOM_COMMAND
(
OUTPUT
${
_filehpp_output
}
DEPENDS
${
_file
}
COMMAND echo ARGS Compiling
${
_filewe
}
COMMAND nvcc ARGS -c
${
_file
}
-o
${
_filehpp_output
}
${
CUSTOM_CUDA_FLAGS
}
)
LIST
(
APPEND SCALFMM_CUDA_SOURCES
${
_filehpp_output
}
)
ENDFOREACH
()
MESSAGE
(
STATUS
"SCALFMM_CUDA_SOURCES =
${
SCALFMM_CUDA_SOURCES
}
"
)
endif
()
# Adding cpp files to project
add_library
(
scalfmm
STATIC
${
source_lib_files
}
${
SCALFMM_CUDA_SOURCES
}
)
# Add blas library (even if it is set to off)
...
...
Src/Components/FBasicParticleContainer.hpp
View file @
f5d468cc
...
...
@@ -291,6 +291,16 @@ public:
nbParticles
+=
1
;
}
/**
* Push called by FTypedLeaf Through arranger
* Should have a particle position fallowed by isTarget flag and attributes
*/
template
<
typename
...
Args
>
void
push
(
const
FPoint
&
inParticlePosition
,
const
FParticleType
type
,
const
std
::
array
<
AttributeClass
,
NbAttributesPerParticle
>&
values
){
push
(
inParticlePosition
,
values
);
}
/**
* Push called usually by FTypedLeaf with the isTarget flag in addition
*/
...
...
Src/Components/FTestCell.hpp
View file @
f5d468cc
...
...
@@ -19,6 +19,9 @@
#include <cstddef>
#include "FBasicCell.hpp"
// To get access to descriptors
struct
FTestCellDescriptor
;
/**
* @author Berenger Bramas (berenger.bramas@inria.fr)
* @class FBasicCell*
...
...
@@ -111,6 +114,9 @@ public:
int
getSavedSizeUp
()
{
return
int
(
sizeof
(
long
long
int
));
}
// To get access to descriptor
friend
struct
FTestCellDescriptor
;
};
...
...
Src/Core/FFmmAlgorithmThreadTsm.hpp
View file @
f5d468cc
...
...
@@ -4,13 +4,13 @@
// This software is a computer program whose purpose is to compute the FMM.
//
// This software is governed by the CeCILL-C and LGPL licenses and
// abiding by the rules of distribution of free software.
//
// abiding by the rules of distribution of free software.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public and CeCILL-C Licenses for more details.
// "http://www.cecill.info".
// "http://www.cecill.info".
// "http://www.gnu.org/licenses".
// ===================================================================================
#ifndef FFMMALGORITHMTHREADTSM_HPP
...
...
@@ -22,7 +22,7 @@
#include "../Utils/FTic.hpp"
#include "../Utils/FGlobal.hpp"
#include "../Utils/FAlgorithmTimers.hpp"
#include "../Containers/FOctree.hpp"
#include "FCoreCommon.hpp"
...
...
@@ -45,7 +45,7 @@
* You should not write on sources in the P2P method!
*/
template
<
class
OctreeClass
,
class
CellClass
,
class
ContainerClass
,
class
KernelClass
,
class
LeafClass
>
class
FFmmAlgorithmThreadTsm
:
public
FAbstractAlgorithm
{
class
FFmmAlgorithmThreadTsm
:
public
FAbstractAlgorithm
,
public
FAlgorithmTimers
{
OctreeClass
*
const
tree
;
//< The octree to work on
KernelClass
**
kernels
;
//< The kernels
...
...
@@ -413,5 +413,3 @@ protected:
#endif //FFMMALGORITHMTHREADTSM_HPP
Src/GroupTree/Cuda/FCudaDeviceWrapper.cu
0 → 100644
View file @
f5d468cc
This diff is collapsed.
Click to expand it.
Src/GroupTree/Cuda/FCudaDeviceWrapper.hpp
0 → 100644
View file @
f5d468cc
// @SCALFMM_PRIVATE
#ifndef FCUDADEVICEWRAPPER_HPP
#define FCUDADEVICEWRAPPER_HPP
#include "../../Utils/FGlobal.hpp"
#include "../FOutOfBlockInteraction.hpp"
template
<
class
CellContainerClass
,
class
ParticleContainerGroupClass
,
class
ParticleGroupClass
,
class
CudaKernelClass
>
void
FCuda__bottomPassCallback
(
unsigned
char
*
leafCellsPtr
,
std
::
size_t
leafCellsSize
,
unsigned
char
*
containersPtr
,
std
::
size_t
containersSize
,
CudaKernelClass
*
kernel
);
template
<
class
CellContainerClass
,
class
ParticleContainerGroupClass
,
class
ParticleGroupClass
,
class
CudaKernelClass
>
void
FCuda__upwardPassCallback
(
unsigned
char
*
currentCellsPtr
,
std
::
size_t
currentCellsSize
,
unsigned
char
*
subCellGroupsPtr
[
9
],
std
::
size_t
subCellGroupsSize
[
9
],
CudaKernelClass
*
kernel
,
int
nbSubCellGroups
,
int
idxLevel
);
template
<
class
CellContainerClass
,
class
ParticleContainerGroupClass
,
class
ParticleGroupClass
,
class
CudaKernelClass
>
void
FCuda__transferInoutPassCallbackMpi
(
unsigned
char
*
currentCellsPtr
,
std
::
size_t
currentCellsSize
,
unsigned
char
*
externalCellsPtr
,
std
::
size_t
externalCellsSize
,
CudaKernelClass
*
kernel
,
int
idxLevel
,
const
OutOfBlockInteraction
*
outsideInteractions
,
int
nbOutsideInteractions
);
template
<
class
CellContainerClass
,
class
ParticleContainerGroupClass
,
class
ParticleGroupClass
,
class
CudaKernelClass
>
void
FCuda__transferInPassCallback
(
unsigned
char
*
currentCellsPtr
,
std
::
size_t
currentCellsSize
,
CudaKernelClass
*
kernel
,
int
idxLevel
);
template
<
class
CellContainerClass
,
class
ParticleContainerGroupClass
,
class
ParticleGroupClass
,
class
CudaKernelClass
>
void
FCuda__transferInoutPassCallback
(
unsigned
char
*
currentCellsPtr
,
std
::
size_t
currentCellsSize
,
unsigned
char
*
externalCellsPtr
,
std
::
size_t
externalCellsSize
,
CudaKernelClass
*
kernel
,
int
idxLevel
,
const
OutOfBlockInteraction
*
outsideInteractions
,
int
nbOutsideInteractions
);
template
<
class
CellContainerClass
,
class
ParticleContainerGroupClass
,
class
ParticleGroupClass
,
class
CudaKernelClass
>
void
FCuda__downardPassCallback
(
unsigned
char
*
currentCellsPtr
,
std
::
size_t
currentCellsSize
,
unsigned
char
*
subCellGroupsPtr
[
9
],
std
::
size_t
subCellGroupsSize
[
9
],
CudaKernelClass
*
kernel
,
int
nbSubCellGroups
,
int
idxLevel
);
template
<
class
CellContainerClass
,
class
ParticleContainerGroupClass
,
class
ParticleGroupClass
,
class
CudaKernelClass
>
void
FCuda__directInoutPassCallbackMpi
(
unsigned
char
*
containersPtr
,
std
::
size_t
containersSize
,
unsigned
char
*
externalContainersPtr
,
std
::
size_t
externalContainersSize
,
CudaKernelClass
*
kernel
,
const
OutOfBlockInteraction
*
outsideInteractions
,
int
nbOutsideInteractions
,
const
int
treeHeight
);
template
<
class
CellContainerClass
,
class
ParticleContainerGroupClass
,
class
ParticleGroupClass
,
class
CudaKernelClass
>
void
FCuda__directInPassCallback
(
unsigned
char
*
containersPtr
,
std
::
size_t
containersSize
,
CudaKernelClass
*
kernel
,
const
int
treeHeight
);
template
<
class
CellContainerClass
,
class
ParticleContainerGroupClass
,
class
ParticleGroupClass
,
class
CudaKernelClass
>
void
FCuda__directInoutPassCallback
(
unsigned
char
*
containersPtr
,
std
::
size_t
containersSize
,
unsigned
char
*
externalContainersPtr
,
std
::
size_t
externalContainersSize
,
CudaKernelClass
*
kernel
,
const
OutOfBlockInteraction
*
outsideInteractions
,
int
nbOutsideInteractions
,
const
int
treeHeight
);
template
<
class
CellContainerClass
,
class
ParticleContainerGroupClass
,
class
ParticleGroupClass
,
class
CudaKernelClass
>
void
FCuda__mergePassCallback
(
unsigned
char
*
leafCellsPtr
,
std
::
size_t
leafCellsSize
,
unsigned
char
*
containersPtr
,
std
::
size_t
containersSize
,
CudaKernelClass
*
kernel
);
template
<
class
CudaKernelClass
>
CudaKernelClass
*
FCuda__BuildCudaKernel
(
void
*
);
template
<
class
CudaKernelClass
>
void
FCuda__ReleaseCudaKernel
(
CudaKernelClass
*
);
#endif
Src/GroupTree/Cuda/FCudaEmptyKernel.hpp
0 → 100644
View file @
f5d468cc
/// @SCALFMM_PRIVATE
#ifndef FCUDAEMPTYKERNEL_HPP
#define FCUDAEMPTYKERNEL_HPP
#include "FCudaGlobal.hpp"
#include "FCudaGroupAttachedLeaf.hpp"
#include "../../Components/FTestCell.hpp"
/**
* This class defines what should be a Cuda kernel.
*/
template
<
class
ContainerClass
=
FCudaGroupAttachedLeaf
<
0
,
int
>
>
class
FCudaEmptyKernel
{
protected:
public:
__device__
void
P2M
(
unsigned
char
*
const
/*pole*/
,
const
ContainerClass
*
const
/*particles*/
)
{
}
__device__
void
M2M
(
unsigned
char
*
const
/*pole*/
,
const
unsigned
char
*
const
*
const
/*child*/
,
const
int
/*level*/
)
{
}
__device__
void
M2L
(
unsigned
char
*
const
/*pole*/
,
const
unsigned
char
*
/*distantNeighbors*/
[
343
],
const
int
/*size*/
,
const
int
/*level*/
)
{
}
__device__
void
L2L
(
const
unsigned
char
*
const
/*local*/
,
unsigned
char
*
*
const
/*child*/
,
const
int
/*level*/
)
{
}
__device__
void
L2P
(
const
unsigned
char
*
const
/*local*/
,
ContainerClass
*
const
/*particles*/
){
}
__device__
void
P2P
(
const
int3
&
,
ContainerClass
*
const
/*targets*/
,
const
ContainerClass
*
const
/*sources*/
,
ContainerClass
*
const
/*directNeighborsParticles*/
[
27
],
const
int
){
}
__device__
void
P2PRemote
(
const
int3
&
,
ContainerClass
*
const
/*targets*/
,
const
ContainerClass
*
const
/*sources*/
,
ContainerClass
*
const
/*directNeighborsParticles*/
[
27
],
const
int
){
}
__device__
MortonIndex
getMortonIndex
(
const
unsigned
char
*
/*cell*/
)
const
{
return
0
;
}
__device__
int3
getCoordinate
(
const
unsigned
char
*
/*cell*/
)
const
{
int3
coord
;
coord
.
x
=
coord
.
y
=
coord
.
z
=
0
;
return
coord
;
}
__host__
static
FCudaEmptyKernel
*
InitKernelKernel
(
void
*
){
return
nullptr
;
}
__host__
static
void
ReleaseKernel
(
FCudaEmptyKernel
*
/*todealloc*/
){
// nothing to do
}
};
#endif // FCUDAEMPTYKERNEL_HPP
Src/GroupTree/Cuda/FCudaGlobal.hpp
0 → 100644
View file @
f5d468cc
// @SCALFMM_PRIVATE
#ifndef FCUDAGLOBAL_HPP
#define FCUDAGLOBAL_HPP
#include "../../Utils/FGlobal.hpp"
// Manage special case for nvcc
#if defined(__CUDACC__) || defined(__NVCC__)
#else
#endif
#include <cuda.h>
#endif // FCUDAGLOBAL_HPP
Src/GroupTree/Cuda/FCudaGroupAttachedLeaf.hpp
0 → 100644
View file @
f5d468cc
// @SCALFMM_PRIVATE
#ifndef FCUDAGROUPATTACHEDLEAF_HPP
#define FCUDAGROUPATTACHEDLEAF_HPP
#include "FCudaGlobal.hpp"
template
<
unsigned
NbAttributesPerParticle
,
class
AttributeClass
=
FReal
>
class
FCudaGroupAttachedLeaf
{
protected:
//< Nb of particles in the current leaf
int
nbParticles
;
//< Pointers to the positions of the particles
FReal
*
positionsPointers
[
3
];
//< Pointers to the attributes of the particles
AttributeClass
*
attributes
[
NbAttributesPerParticle
];
public:
/** Empty constructor to point to nothing */
__device__
FCudaGroupAttachedLeaf
()
:
nbParticles
(
-
1
)
{
memset
(
positionsPointers
,
0
,
sizeof
(
FReal
*
)
*
3
);
memset
(
attributes
,
0
,
sizeof
(
AttributeClass
*
)
*
NbAttributesPerParticle
);
}
/**
* @brief FCudaGroupAttachedLeaf
* @param inNbParticles the number of particles in the leaf
* @param inPositionBuffer the memory address of the X array of particls
* @param inLeadingPosition each position is access by inPositionBuffer + in bytes inLeadingPosition*idx
* @param inAttributesBuffer the memory address of the first attribute
* @param inLeadingAttributes each attribute is access by inAttributesBuffer + in bytes inLeadingAttributes*idx
*/
__device__
FCudaGroupAttachedLeaf
(
const
int
inNbParticles
,
FReal
*
inPositionBuffer
,
const
size_t
inLeadingPosition
,
AttributeClass
*
inAttributesBuffer
,
const
size_t
inLeadingAttributes
)
:
nbParticles
(
inNbParticles
){
// Redirect pointers to position
positionsPointers
[
0
]
=
inPositionBuffer
;
positionsPointers
[
1
]
=
reinterpret_cast
<
FReal
*>
(
reinterpret_cast
<
unsigned
char
*>
(
inPositionBuffer
)
+
inLeadingPosition
);
positionsPointers
[
2
]
=
reinterpret_cast
<
FReal
*>
(
reinterpret_cast
<
unsigned
char
*>
(
inPositionBuffer
)
+
inLeadingPosition
*
2
);
// Redirect pointers to data
for
(
unsigned
idxAttribute
=
0
;
idxAttribute
<
NbAttributesPerParticle
;
++
idxAttribute
){
attributes
[
idxAttribute
]
=
reinterpret_cast
<
AttributeClass
*>
(
reinterpret_cast
<
unsigned
char
*>
(
inAttributesBuffer
)
+
idxAttribute
*
inLeadingAttributes
);
}
}
/** Copy the attached group to another one (copy the pointer not the content!) */
__device__
FCudaGroupAttachedLeaf
(
const
FCudaGroupAttachedLeaf
&
other
)
:
nbParticles
(
other
.
nbParticles
)
{
positionsPointers
[
0
]
=
other
.
positionsPointers
[
0
];
positionsPointers
[
1
]
=
other
.
positionsPointers
[
1
];
positionsPointers
[
2
]
=
other
.
positionsPointers
[
2
];
// Redirect pointers to data
for
(
unsigned
idxAttribute
=
0
;
idxAttribute
<
NbAttributesPerParticle
;
++
idxAttribute
){
attributes
[
idxAttribute
]
=
other
.
attributes
[
idxAttribute
];
}
}
/** Copy the attached group to another one (copy the pointer not the content!) */
__device__
FCudaGroupAttachedLeaf
&
operator
=
(
const
FCudaGroupAttachedLeaf
&
other
){
nbParticles
=
(
other
.
nbParticles
);
positionsPointers
[
0
]
=
other
.
positionsPointers
[
0
];
positionsPointers
[
1
]
=
other
.
positionsPointers
[
1
];
positionsPointers
[
2
]
=
other
.
positionsPointers
[
2
];
// Redirect pointers to data
for
(
unsigned
idxAttribute
=
0
;
idxAttribute
<
NbAttributesPerParticle
;
++
idxAttribute
){
attributes
[
idxAttribute
]
=
other
.
attributes
[
idxAttribute
];
}
return
(
*
this
);
}
/**
* @brief getNbParticles
* @return the number of particles in the leaf
*/
__device__
int
getNbParticles
()
const
{
return
nbParticles
;
}
/**
* @brief getPositions
* @return a FReal*[3] to get access to the positions
*/
__device__
const
FReal
*
const
*
getPositions
()
const
{
return
positionsPointers
;
}
/**
* @brief getWPositions
* @return get the position in write mode
*/
__device__
FReal
*
const
*
getWPositions
()
{
return
positionsPointers
;
}
/**
* @brief getAttribute
* @param index
* @return the attribute at index index
*/
__device__
AttributeClass
*
getAttribute
(
const
int
index
)
{
return
attributes
[
index
];
}
/**
* @brief getAttribute
* @param index
* @return