Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
S
ScalFMM
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
5
Issues
5
List
Boards
Labels
Service Desk
Milestones
Operations
Operations
Incidents
Packages & Registries
Packages & Registries
Container Registry
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
solverstack
ScalFMM
Commits
f5d468cc
Commit
f5d468cc
authored
Feb 20, 2015
by
Florent Pruvost
Browse files
Options
Browse Files
Download
Plain Diff
merge with origin/master
parents
36bc62de
beec09ae
Changes
41
Hide whitespace changes
Inline
Side-by-side
Showing
41 changed files
with
2559 additions
and
591 deletions
+2559
-591
CMakeLists.txt
CMakeLists.txt
+28
-29
Src/Arranger/FAbstractMover.hpp
Src/Arranger/FAbstractMover.hpp
+1
-1
Src/Arranger/FOctreeArranger.hpp
Src/Arranger/FOctreeArranger.hpp
+23
-3
Src/Arranger/FParticleTypedIndexedMover.hpp
Src/Arranger/FParticleTypedIndexedMover.hpp
+77
-0
Src/CMakeLists.txt
Src/CMakeLists.txt
+24
-0
Src/Components/FBasicParticleContainer.hpp
Src/Components/FBasicParticleContainer.hpp
+10
-0
Src/Components/FTestCell.hpp
Src/Components/FTestCell.hpp
+6
-0
Src/Core/FFmmAlgorithmThreadTsm.hpp
Src/Core/FFmmAlgorithmThreadTsm.hpp
+5
-7
Src/GroupTree/Cuda/FCudaDeviceWrapper.cu
Src/GroupTree/Cuda/FCudaDeviceWrapper.cu
+665
-0
Src/GroupTree/Cuda/FCudaDeviceWrapper.hpp
Src/GroupTree/Cuda/FCudaDeviceWrapper.hpp
+67
-0
Src/GroupTree/Cuda/FCudaEmptyKernel.hpp
Src/GroupTree/Cuda/FCudaEmptyKernel.hpp
+63
-0
Src/GroupTree/Cuda/FCudaGlobal.hpp
Src/GroupTree/Cuda/FCudaGlobal.hpp
+15
-0
Src/GroupTree/Cuda/FCudaGroupAttachedLeaf.hpp
Src/GroupTree/Cuda/FCudaGroupAttachedLeaf.hpp
+155
-0
Src/GroupTree/Cuda/FCudaGroupOfCells.hpp
Src/GroupTree/Cuda/FCudaGroupOfCells.hpp
+122
-0
Src/GroupTree/Cuda/FCudaGroupOfParticles.hpp
Src/GroupTree/Cuda/FCudaGroupOfParticles.hpp
+166
-0
Src/GroupTree/Cuda/FCudaTestKernels.hpp
Src/GroupTree/Cuda/FCudaTestKernels.hpp
+153
-0
Src/GroupTree/Cuda/FCudaTreeCoordinate.hpp
Src/GroupTree/Cuda/FCudaTreeCoordinate.hpp
+167
-0
Src/GroupTree/FGroupOfCells.hpp
Src/GroupTree/FGroupOfCells.hpp
+24
-0
Src/GroupTree/FGroupSeqAlgorithm.hpp
Src/GroupTree/FGroupSeqAlgorithm.hpp
+2
-10
Src/GroupTree/FGroupTaskAlgorithm.hpp
Src/GroupTree/FGroupTaskAlgorithm.hpp
+2
-10
Src/GroupTree/FGroupTaskDepAlgorithm.hpp
Src/GroupTree/FGroupTaskDepAlgorithm.hpp
+2
-10
Src/GroupTree/FGroupTaskStarpuAlgorithm.hpp
Src/GroupTree/FGroupTaskStarpuAlgorithm.hpp
+56
-51
Src/GroupTree/FGroupTaskStarpuMpiAlgorithm.hpp
Src/GroupTree/FGroupTaskStarpuMpiAlgorithm.hpp
+66
-61
Src/GroupTree/FGroupTree.hpp
Src/GroupTree/FGroupTree.hpp
+2
-2
Src/GroupTree/FOutOfBlockInteraction.hpp
Src/GroupTree/FOutOfBlockInteraction.hpp
+17
-0
Src/GroupTree/FStarPUCpuWrapper.hpp
Src/GroupTree/FStarPUCpuWrapper.hpp
+6
-14
Src/GroupTree/FStarPUCudaWrapper.hpp
Src/GroupTree/FStarPUCudaWrapper.hpp
+118
-338
Src/GroupTree/FStarPUKernelCapacities.hpp
Src/GroupTree/FStarPUKernelCapacities.hpp
+39
-13
Src/GroupTree/FStarPUOpenClWrapper.hpp
Src/GroupTree/FStarPUOpenClWrapper.hpp
+6
-14
Src/GroupTree/FStarPUUtils.hpp
Src/GroupTree/FStarPUUtils.hpp
+22
-2
Src/Kernels/Chebyshev/FChebSymKernel.hpp
Src/Kernels/Chebyshev/FChebSymKernel.hpp
+1
-1
Src/Kernels/Chebyshev/FChebSymM2LHandler.hpp
Src/Kernels/Chebyshev/FChebSymM2LHandler.hpp
+4
-4
Src/Kernels/P2P/FP2PParticleContainer.hpp
Src/Kernels/P2P/FP2PParticleContainer.hpp
+1
-1
Src/Kernels/Rotation/FRotationCell.hpp
Src/Kernels/Rotation/FRotationCell.hpp
+1
-1
Src/ScalFmmConfig.h.cmake
Src/ScalFmmConfig.h.cmake
+2
-8
Src/Utils/FOffetOf.hpp
Src/Utils/FOffetOf.hpp
+9
-0
Src/Utils/FQuickSort.hpp
Src/Utils/FQuickSort.hpp
+4
-3
Tests/Utils/testOctreeRearrangeTsm.cpp
Tests/Utils/testOctreeRearrangeTsm.cpp
+251
-0
Tests/Utils/testTreeBuilderThread.cpp
Tests/Utils/testTreeBuilderThread.cpp
+0
-6
Tests/noDist/testBlockedWithCudaAlgorithm.cpp
Tests/noDist/testBlockedWithCudaAlgorithm.cpp
+176
-0
Utils/noDist/FmmAlgorithmTsm.cpp
Utils/noDist/FmmAlgorithmTsm.cpp
+1
-2
No files found.
CMakeLists.txt
View file @
f5d468cc
...
...
@@ -13,33 +13,15 @@ set(CMAKE_DISABLE_IN_SOURCE_BUILD ON)
#===========================================================================
project
(
ScalFMM C CXX
)
# directly make an error if in-source build
#if("${PROJECT_SOURCE_DIR}" STREQUAL "${PROJECT_BINARY_DIR}")
# message(FATAL_ERROR "In-source builds are not allowed.\n"
# "Please create a build directory first and execute cmake configuration from "
# "this directory. Example: mkdir build && cd build && cmake ..")
#endif()
#
# check if compiling into source directories
#
string(COMPARE EQUAL "${CMAKE_SOURCE_DIR}" "${CMAKE_BINARY_DIR}" insource)
#
if(insource)
#
message(FATAL_ERROR "${PROJECT_NAME} requires an out of source build. Goto ./Build and tapes cmake ../")
#
endif(insource)
string
(
COMPARE EQUAL
"
${
CMAKE_SOURCE_DIR
}
"
"
${
CMAKE_BINARY_DIR
}
"
insource
)
if
(
insource
)
message
(
FATAL_ERROR
"
${
PROJECT_NAME
}
requires an out of source build. Goto ./Build and tapes cmake ../"
)
endif
(
insource
)
set
(
ScalFMM_CMAKE_MODULE_PATH
${
CMAKE_SOURCE_DIR
}
/CMakeModules
)
# MPI option has to be set before project, cannot be changed in the cache!
#if( ScalFMM_USE_MPI )
# include(CMakeForceCompiler)
# CMAKE_FORCE_C_COMPILER(mpicc "MPI C Compiler")
# CMAKE_FORCE_CXX_COMPILER(mpicxx "MPI C++ Compiler")
# set(ScalFMM_USE_MPI ON CACHE BOOL "ScalFMM use MPI")
#else()
# message(STATUS "Remove CMake cache and run cmake .. -DScalFMM_USE_MPI=ON to enable MPI" )
#endif(ScalFMM_USE_MPI)
#===========================================================================
# Version Number
#===========================================================================
...
...
@@ -66,12 +48,6 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/")
endif
()
include
(
MorseInit
)
#
# Active language
# -----------------------
# enable_language(CXX)
#
#
# Options
option
(
ScalFMM_USE_MPI
"Set to ON to build ScaFMM with MPI"
OFF
)
...
...
@@ -411,6 +387,7 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/")
##################################################################
##################################################################
#
message
(
STATUS
"ScalFMM_USE_STARPU =
${
ScalFMM_USE_STARPU
}
"
)
if
(
ScalFMM_USE_STARPU
)
set
(
ScalFMM_STARPU_VERSION
"1.1"
CACHE STRING
"oldest STARPU version desired"
)
...
...
@@ -441,6 +418,26 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/")
include_directories
(
${
STARPU_INCLUDES
}
)
endif
()
option
(
ScalFMM_USE_CUDA
"Set to ON to use CUDA with StarPU"
OFF
)
message
(
STATUS
"ScalFMM_USE_CUDA =
${
ScalFMM_USE_CUDA
}
"
)
if
(
ScalFMM_USE_CUDA
)
execute_process
(
COMMAND nvcc --version ERROR_VARIABLE cuda_error_output OUTPUT_QUIET
)
if
(
cuda_error_output
)
message
(
FATAL_ERROR
"nvcc is needed with CUDA."
)
endif
()
if
(
NOT DEFINED CUSTOM_CUDA_FLAGS
)
set
(
CUSTOM_CUDA_FLAGS
"-std=c++11;-arch=sm_20"
CACHE STRING
"Set your CUDA flags, for example : -arch=sm_20;-ptxas-options=-v;-use_fast_math"
)
endif
()
# This is needed to remove backslash after space in ADD_CUSTOM_COMMAND
separate_arguments
(
CUSTOM_CUDA_FLAGS
)
message
(
STATUS
"CUSTOM_CUDA_FLAGS =
${
CUSTOM_CUDA_FLAGS
}
"
)
# Add libcudart and cuda.h
# link_directories($ENV{CUDA_LIB})
include_directories
(
$ENV{CUDA_INC}
)
set
(
SCALFMM_LIBRARIES
"
${
SCALFMM_LIBRARIES
}
; -L$ENV{CUDA_LIB}; -lcudart"
)
endif
()
message
(
STATUS
" STARPU_LIBRARIES =
${
STARPU_LIBRARIES
}
"
)
if
(
STARPU_INCLUDE_DIRS
)
message
(
STATUS
" STARPU_INCLUDES =
${
STARPU_INCLUDES
}
"
)
...
...
@@ -448,7 +445,8 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/")
endif
(
ScalFMM_USE_STARPU
)
list
(
APPEND FUSE_LIST
"STARPU"
)
#
list
(
APPEND FUSE_LIST
"CUDA"
)
##################################################################
# Use SSE #
##################################################################
...
...
@@ -479,6 +477,7 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/")
endif
(
${
COMPILE_SSE
}
)
endif
()
list
(
APPEND FUSE_LIST
"SSE"
)
##################################################################
# Use AVX #
##################################################################
...
...
Src/Arranger/FAbstractMover.hpp
View file @
f5d468cc
...
...
@@ -20,7 +20,7 @@ template<class OctreeClass,class ParticleClass>
class
FAbstractMover
{
public:
virtual
void
getParticlePosition
(
ParticleClass
*
lf
,
const
int
idxPart
,
FPoint
*
particlePos
)
=
0
;
virtual
void
removeFromLeafAndKeep
(
ParticleClass
*
lf
,
const
FPoint
&
particlePos
,
const
int
idxPart
)
=
0
;
virtual
void
removeFromLeafAndKeep
(
ParticleClass
*
lf
,
const
FPoint
&
particlePos
,
const
int
idxPart
,
FParticleType
type
)
=
0
;
virtual
void
insertAllParticles
(
OctreeClass
*
tree
)
=
0
;
};
...
...
Src/Arranger/FOctreeArranger.hpp
View file @
f5d468cc
...
...
@@ -74,6 +74,7 @@ public:
octreeIterator
.
gotoBottomLeft
();
do
{
const
MortonIndex
currentMortonIndex
=
octreeIterator
.
getCurrentGlobalIndex
();
//First we test sources
ContainerClass
*
particles
=
octreeIterator
.
getCurrentLeaf
()
->
getSrc
();
for
(
int
idxPart
=
0
;
idxPart
<
particles
->
getNbParticles
();
/*++idxPart*/
){
FPoint
currentPart
;
...
...
@@ -82,15 +83,33 @@ public:
const
MortonIndex
particuleIndex
=
tree
->
getMortonFromPosition
(
currentPart
);
if
(
particuleIndex
!=
currentMortonIndex
){
//Need to move this one
interface
->
removeFromLeafAndKeep
(
particles
,
currentPart
,
idxPart
);
interface
->
removeFromLeafAndKeep
(
particles
,
currentPart
,
idxPart
,
FParticleTypeSource
);
}
else
{
//Need to increment idx;
++
idxPart
;
}
}
//Then we test targets
if
(
octreeIterator
.
getCurrentLeaf
()
->
getTargets
()
!=
particles
){
//Leaf is TypedLeaf
ContainerClass
*
particleTargets
=
octreeIterator
.
getCurrentLeaf
()
->
getTargets
();
for
(
int
idxPart
=
0
;
idxPart
<
particleTargets
->
getNbParticles
();
/*++idxPart*/
){
FPoint
currentPart
;
interface
->
getParticlePosition
(
particleTargets
,
idxPart
,
&
currentPart
);
checkPosition
(
currentPart
);
const
MortonIndex
particuleIndex
=
tree
->
getMortonFromPosition
(
currentPart
);
if
(
particuleIndex
!=
currentMortonIndex
){
//Need to move this one
interface
->
removeFromLeafAndKeep
(
particleTargets
,
currentPart
,
idxPart
,
FParticleTypeTarget
);
}
else
{
//Need to increment idx;
++
idxPart
;
}
}
}
}
while
(
octreeIterator
.
moveRight
());
printf
(
"Insert back particles
\n
"
);
//Insert back the parts that have been removed
interface
->
insertAllParticles
(
tree
);
...
...
@@ -101,7 +120,8 @@ public:
bool
workOnNext
=
true
;
do
{
// Empty leaf
if
(
octreeIterator
.
getCurrentListTargets
()
->
getNbParticles
()
==
0
){
if
(
octreeIterator
.
getCurrentListTargets
()
->
getNbParticles
()
==
0
&&
octreeIterator
.
getCurrentListSrc
()
->
getNbParticles
()
==
0
){
const
MortonIndex
currentIndex
=
octreeIterator
.
getCurrentGlobalIndex
();
workOnNext
=
octreeIterator
.
moveRight
();
tree
->
removeLeaf
(
currentIndex
);
...
...
Src/Arranger/FParticleTypedIndexedMover.hpp
0 → 100644
View file @
f5d468cc
#ifndef FPARTICULETYPEDINDEXEDMOVER_HPP
#define FPARTICULETYPEDINDEXEDMOVER_HPP
#include "FAbstractMover.hpp"
#include "../Containers/FVector.hpp"
/**
* This class should be use with the octree arrange to move particles
* that are typed (src/tgt) and stored in a FBasicParticleContainer
*/
template
<
class
OctreeClass
,
class
ContainerClass
>
class
FParticleTypedIndexedMover
:
public
FAbstractMover
<
OctreeClass
,
ContainerClass
>
{
private:
ContainerClass
toStoreRemovedSourceParts
;
ContainerClass
toStoreRemovedTargetParts
;
public:
FParticleTypedIndexedMover
(){
}
virtual
~
FParticleTypedIndexedMover
(){
}
/** To get the position of the particle at idx idxPart in leaf lf */
void
getParticlePosition
(
ContainerClass
*
lf
,
const
int
idxPart
,
FPoint
*
particlePos
){
(
*
particlePos
)
=
FPoint
(
lf
->
getPositions
()[
0
][
idxPart
],
lf
->
getPositions
()[
1
][
idxPart
],
lf
->
getPositions
()[
2
][
idxPart
]);
}
/** Remove a particle but keep it to reinsert it later*/
void
removeFromLeafAndKeep
(
ContainerClass
*
lf
,
const
FPoint
&
particlePos
,
const
int
idxPart
,
FParticleType
type
){
std
::
array
<
typename
ContainerClass
::
AttributesClass
,
ContainerClass
::
NbAttributes
>
particleValues
;
for
(
int
idxAttr
=
0
;
idxAttr
<
ContainerClass
::
NbAttributes
;
++
idxAttr
){
particleValues
[
idxAttr
]
=
lf
->
getAttribute
(
idxAttr
)[
idxPart
];
}
if
(
type
==
FParticleTypeTarget
){
toStoreRemovedTargetParts
.
push
(
particlePos
,
FParticleTypeTarget
,
lf
->
getIndexes
()[
idxPart
],
particleValues
);
}
else
{
toStoreRemovedSourceParts
.
push
(
particlePos
,
FParticleTypeSource
,
lf
->
getIndexes
()[
idxPart
],
particleValues
);
}
lf
->
removeParticles
(
&
idxPart
,
1
);
}
/** Reinsert the previously saved particles */
void
insertAllParticles
(
OctreeClass
*
tree
){
std
::
array
<
typename
ContainerClass
::
AttributesClass
,
ContainerClass
::
NbAttributes
>
particleValues
;
for
(
int
idxToInsert
=
0
;
idxToInsert
<
toStoreRemovedSourceParts
.
getNbParticles
()
;
++
idxToInsert
){
for
(
int
idxAttr
=
0
;
idxAttr
<
ContainerClass
::
NbAttributes
;
++
idxAttr
){
particleValues
[
idxAttr
]
=
toStoreRemovedSourceParts
.
getAttribute
(
idxAttr
)[
idxToInsert
];
}
const
FPoint
particlePos
(
toStoreRemovedSourceParts
.
getPositions
()[
0
][
idxToInsert
],
toStoreRemovedSourceParts
.
getPositions
()[
1
][
idxToInsert
],
toStoreRemovedSourceParts
.
getPositions
()[
2
][
idxToInsert
]);
tree
->
insert
(
particlePos
,
FParticleTypeSource
,
toStoreRemovedSourceParts
.
getIndexes
()[
idxToInsert
],
particleValues
);
}
for
(
int
idxToInsert
=
0
;
idxToInsert
<
toStoreRemovedTargetParts
.
getNbParticles
()
;
++
idxToInsert
){
for
(
int
idxAttr
=
0
;
idxAttr
<
ContainerClass
::
NbAttributes
;
++
idxAttr
){
particleValues
[
idxAttr
]
=
toStoreRemovedTargetParts
.
getAttribute
(
idxAttr
)[
idxToInsert
];
}
const
FPoint
particlePos
(
toStoreRemovedTargetParts
.
getPositions
()[
0
][
idxToInsert
],
toStoreRemovedTargetParts
.
getPositions
()[
1
][
idxToInsert
],
toStoreRemovedTargetParts
.
getPositions
()[
2
][
idxToInsert
]);
tree
->
insert
(
particlePos
,
FParticleTypeTarget
,
toStoreRemovedTargetParts
.
getIndexes
()[
idxToInsert
],
particleValues
);
}
toStoreRemovedSourceParts
.
clear
();
toStoreRemovedTargetParts
.
clear
();
}
};
#endif //FPARTICULETYPEDINDEXEDMOVER_HPP
Src/CMakeLists.txt
View file @
f5d468cc
...
...
@@ -17,11 +17,35 @@ file(
./*.cpp
)
# Add CUDA files once they are compiled from cu to .o
if
(
ScalFMM_USE_CUDA
)
# Find all the CU files in my project
file
(
GLOB_RECURSE source_cu_files ./*.cu
)
# Iterate and add builind command for each file
set
(
SCALFMM_CUDA_SOURCES
""
)
FOREACH
(
_file
${
source_cu_files
}
)
GET_FILENAME_COMPONENT
(
_filewe
${
_file
}
NAME_WE
)
SET
(
_filehpp_output
${
CMAKE_CURRENT_BINARY_DIR
}
/
${
_filewe
}
.o
)
ADD_CUSTOM_COMMAND
(
OUTPUT
${
_filehpp_output
}
DEPENDS
${
_file
}
COMMAND echo ARGS Compiling
${
_filewe
}
COMMAND nvcc ARGS -c
${
_file
}
-o
${
_filehpp_output
}
${
CUSTOM_CUDA_FLAGS
}
)
LIST
(
APPEND SCALFMM_CUDA_SOURCES
${
_filehpp_output
}
)
ENDFOREACH
()
MESSAGE
(
STATUS
"SCALFMM_CUDA_SOURCES =
${
SCALFMM_CUDA_SOURCES
}
"
)
endif
()
# Adding cpp files to project
add_library
(
scalfmm
STATIC
${
source_lib_files
}
${
SCALFMM_CUDA_SOURCES
}
)
# Add blas library (even if it is set to off)
...
...
Src/Components/FBasicParticleContainer.hpp
View file @
f5d468cc
...
...
@@ -291,6 +291,16 @@ public:
nbParticles
+=
1
;
}
/**
* Push called by FTypedLeaf Through arranger
* Should have a particle position fallowed by isTarget flag and attributes
*/
template
<
typename
...
Args
>
void
push
(
const
FPoint
&
inParticlePosition
,
const
FParticleType
type
,
const
std
::
array
<
AttributeClass
,
NbAttributesPerParticle
>&
values
){
push
(
inParticlePosition
,
values
);
}
/**
* Push called usually by FTypedLeaf with the isTarget flag in addition
*/
...
...
Src/Components/FTestCell.hpp
View file @
f5d468cc
...
...
@@ -19,6 +19,9 @@
#include <cstddef>
#include "FBasicCell.hpp"
// To get access to descriptors
struct
FTestCellDescriptor
;
/**
* @author Berenger Bramas (berenger.bramas@inria.fr)
* @class FBasicCell*
...
...
@@ -111,6 +114,9 @@ public:
int
getSavedSizeUp
()
{
return
int
(
sizeof
(
long
long
int
));
}
// To get access to descriptor
friend
struct
FTestCellDescriptor
;
};
...
...
Src/Core/FFmmAlgorithmThreadTsm.hpp
View file @
f5d468cc
...
...
@@ -4,13 +4,13 @@
// This software is a computer program whose purpose is to compute the FMM.
//
// This software is governed by the CeCILL-C and LGPL licenses and
// abiding by the rules of distribution of free software.
//
// abiding by the rules of distribution of free software.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public and CeCILL-C Licenses for more details.
// "http://www.cecill.info".
// "http://www.cecill.info".
// "http://www.gnu.org/licenses".
// ===================================================================================
#ifndef FFMMALGORITHMTHREADTSM_HPP
...
...
@@ -22,7 +22,7 @@
#include "../Utils/FTic.hpp"
#include "../Utils/FGlobal.hpp"
#include "../Utils/FAlgorithmTimers.hpp"
#include "../Containers/FOctree.hpp"
#include "FCoreCommon.hpp"
...
...
@@ -45,7 +45,7 @@
* You should not write on sources in the P2P method!
*/
template
<
class
OctreeClass
,
class
CellClass
,
class
ContainerClass
,
class
KernelClass
,
class
LeafClass
>
class
FFmmAlgorithmThreadTsm
:
public
FAbstractAlgorithm
{
class
FFmmAlgorithmThreadTsm
:
public
FAbstractAlgorithm
,
public
FAlgorithmTimers
{
OctreeClass
*
const
tree
;
//< The octree to work on
KernelClass
**
kernels
;
//< The kernels
...
...
@@ -413,5 +413,3 @@ protected:
#endif //FFMMALGORITHMTHREADTSM_HPP
Src/GroupTree/Cuda/FCudaDeviceWrapper.cu
0 → 100644
View file @
f5d468cc
// @SCALFMM_PRIVATE
#include "FCudaDeviceWrapper.hpp"
#include "FCudaTreeCoordinate.hpp"
static
const
int
nbCudaThreads
=
32
;
static
const
int
nbCudaBlocks
=
1
;
static
void
FCudaCheckCore
(
cudaError_t
code
,
const
char
*
file
,
int
line
)
{
if
(
code
!=
cudaSuccess
)
{
fprintf
(
stderr
,
"Cuda Error %d : %s %s %d
\n
"
,
code
,
cudaGetErrorString
(
code
),
file
,
line
);
exit
(
code
);
}
}
#define FCudaCheck( test ) { FCudaCheckCore((test), __FILE__, __LINE__); }
#define FCudaCheckAfterCall() { FCudaCheckCore((cudaGetLastError()), __FILE__, __LINE__); }
#define FCudaAssertLF(ARGS) ARGS;
#define FMGetOppositeNeighIndex(index) (27-(index)-1)
#define FMGetOppositeInterIndex(index) (343-(index)-1)
#define FCudaMax(x,y) ((x)<(y) ? (y) : (x))
#define FCudaMin(x,y) ((x)>(y) ? (y) : (x))
template
<
class
CellContainerClass
,
class
ParticleContainerGroupClass
,
class
ParticleGroupClass
,
class
CudaKernelClass
>
__global__
void
FCuda__bottomPassPerform
(
unsigned
char
*
leafCellsPtr
,
std
::
size_t
leafCellsSize
,
unsigned
char
*
containersPtr
,
std
::
size_t
containersSize
,
CudaKernelClass
*
kernel
){
CellContainerClass
leafCells
(
leafCellsPtr
,
leafCellsSize
);
ParticleContainerGroupClass
containers
(
containersPtr
,
containersSize
);
const
MortonIndex
blockStartIdx
=
leafCells
.
getStartingIndex
();
const
MortonIndex
blockEndIdx
=
leafCells
.
getEndingIndex
();
for
(
MortonIndex
mindex
=
blockStartIdx
;
mindex
<
blockEndIdx
;
++
mindex
){
unsigned
char
*
cell
=
leafCells
.
getCell
(
mindex
);
if
(
cell
){
FCudaAssertLF
(
kernel
->
getMortonIndex
(
cell
)
==
mindex
);
ParticleGroupClass
particles
=
containers
.
template
getLeaf
<
ParticleGroupClass
>(
mindex
);
FCudaAssertLF
(
particles
.
isAttachedToSomething
());
kernel
->
P2M
(
cell
,
&
particles
);
}
}
}
template
<
class
CellContainerClass
,
class
ParticleContainerGroupClass
,
class
ParticleGroupClass
,
class
CudaKernelClass
>
__host__
void
FCuda__bottomPassCallback
(
unsigned
char
*
leafCellsPtr
,
std
::
size_t
leafCellsSize
,
unsigned
char
*
containersPtr
,
std
::
size_t
containersSize
,
CudaKernelClass
*
kernel
){
FCuda__bottomPassPerform
<
CellContainerClass
,
ParticleContainerGroupClass
,
ParticleGroupClass
,
CudaKernelClass
>
<<<
nbCudaThreads
,
nbCudaBlocks
,
0
/*starpu_cuda_get_local_stream()*/
>>>
(
leafCellsPtr
,
leafCellsSize
,
containersPtr
,
containersSize
,
kernel
);
FCudaCheckAfterCall
();
}
/////////////////////////////////////////////////////////////////////////////////////
/// Upward Pass
/////////////////////////////////////////////////////////////////////////////////////
template
<
class
CellContainerClass
,
class
ParticleContainerGroupClass
,
class
ParticleGroupClass
,
class
CudaKernelClass
>
__global__
void
FCuda__upwardPassPerform
(
unsigned
char
*
currentCellsPtr
,
std
::
size_t
currentCellsSize
,
unsigned
char
*
subCellGroupsPtr
[
9
],
std
::
size_t
subCellGroupsSize
[
9
],
CudaKernelClass
*
kernel
,
int
nbSubCellGroups
,
int
idxLevel
){
CellContainerClass
currentCells
(
currentCellsPtr
,
currentCellsSize
);
CellContainerClass
subCellGroups
[
9
];
for
(
int
idx
=
0
;
idx
<
nbSubCellGroups
;
++
idx
){
subCellGroups
[
idx
].
reset
(
subCellGroupsPtr
[
idx
],
subCellGroupsSize
[
idx
]);
}
FCudaAssertLF
(
nbSubCellGroups
!=
0
);
const
MortonIndex
blockStartIdx
=
FCudaMax
(
currentCells
.
getStartingIndex
(),
subCellGroups
[
0
].
getStartingIndex
()
>>
3
);
const
MortonIndex
blockEndIdx
=
FCudaMin
(
currentCells
.
getEndingIndex
(),
((
subCellGroups
[
nbSubCellGroups
-
1
].
getEndingIndex
()
-
1
)
>>
3
)
+
1
);
int
idxSubCellGroup
=
0
;
for
(
MortonIndex
mindex
=
blockStartIdx
;
mindex
<
blockEndIdx
&&
idxSubCellGroup
!=
nbSubCellGroups
;
++
mindex
){
unsigned
char
*
cell
=
currentCells
.
getCell
(
mindex
);
if
(
cell
){
FCudaAssertLF
(
kernel
->
getMortonIndex
(
cell
)
==
mindex
);
unsigned
char
*
child
[
8
]
=
{
nullptr
,
nullptr
,
nullptr
,
nullptr
,
nullptr
,
nullptr
,
nullptr
,
nullptr
};
for
(
int
idxChild
=
0
;
idxChild
<
8
;
++
idxChild
){
if
(
subCellGroups
[
idxSubCellGroup
].
getEndingIndex
()
<=
((
mindex
<<
3
)
+
idxChild
)
){
idxSubCellGroup
+=
1
;
}
if
(
idxSubCellGroup
==
nbSubCellGroups
){
break
;
}
child
[
idxChild
]
=
subCellGroups
[
idxSubCellGroup
].
getCell
((
mindex
<<
3
)
+
idxChild
);
FCudaAssertLF
(
child
[
idxChild
]
==
nullptr
||
kernel
->
getMortonIndex
(
child
[
idxChild
])
==
((
mindex
<<
3
)
+
idxChild
));
}
kernel
->
M2M
(
cell
,
child
,
idxLevel
);
}
}
}
template
<
class
CellContainerClass
,
class
ParticleContainerGroupClass
,
class
ParticleGroupClass
,
class
CudaKernelClass
>
__host__
void
FCuda__upwardPassCallback
(
unsigned
char
*
currentCellsPtr
,
std
::
size_t
currentCellsSize
,
unsigned
char
*
subCellGroupsPtr
[
9
],
std
::
size_t
subCellGroupsSize
[
9
],
CudaKernelClass
*
kernel
,
int
nbSubCellGroups
,
int
idxLevel
){
FCuda__upwardPassPerform
<
CellContainerClass
,
ParticleContainerGroupClass
,
ParticleGroupClass
,
CudaKernelClass
>
<<<
nbCudaThreads
,
nbCudaBlocks
,
0
/*starpu_cuda_get_local_stream()*/
>>>
(
currentCellsPtr
,
currentCellsSize
,
subCellGroupsPtr
,
subCellGroupsSize
,
kernel
,
nbSubCellGroups
,
idxLevel
);
FCudaCheckAfterCall
();
}
/////////////////////////////////////////////////////////////////////////////////////
/// Transfer Pass Mpi
/////////////////////////////////////////////////////////////////////////////////////
template
<
class
CellContainerClass
,
class
ParticleContainerGroupClass
,
class
ParticleGroupClass
,
class
CudaKernelClass
>
__global__
void
FCuda__transferInoutPassPerformMpi
(
unsigned
char
*
currentCellsPtr
,
std
::
size_t
currentCellsSize
,
unsigned
char
*
externalCellsPtr
,
std
::
size_t
externalCellsSize
,
CudaKernelClass
*
kernel
,
int
idxLevel
,
const
OutOfBlockInteraction
*
outsideInteractions
,
int
nbOutsideInteractions
){
CellContainerClass
currentCells
(
currentCellsPtr
,
currentCellsSize
);
CellContainerClass
cellsOther
(
externalCellsPtr
,
externalCellsSize
);
for
(
int
outInterIdx
=
0
;
outInterIdx
<
nbOutsideInteractions
;
++
outInterIdx
){
unsigned
char
*
interCell
=
cellsOther
.
getCell
(
outsideInteractions
[
outInterIdx
].
outIndex
);
if
(
interCell
){
FCudaAssertLF
(
kernel
->
getMortonIndex
(
interCell
)
==
outsideInteractions
[
outInterIdx
].
outIndex
);
unsigned
char
*
cell
=
currentCells
.
getCell
(
outsideInteractions
[
outInterIdx
].
insideIndex
);
FCudaAssertLF
(
cell
);
FCudaAssertLF
(
kernel
->
getMortonIndex
(
cell
)
==
outsideInteractions
[
outInterIdx
].
insideIndex
);
const
unsigned
char
*
interactions
[
343
];
memset
(
interactions
,
0
,
343
*
sizeof
(
unsigned
char
*
));
interactions
[
outsideInteractions
[
outInterIdx
].
outPosition
]
=
interCell
;
const
int
counter
=
1
;
kernel
->
M2L
(
cell
,
interactions
,
counter
,
idxLevel
);
}
}
}
template
<
class
CellContainerClass
,
class
ParticleContainerGroupClass
,
class
ParticleGroupClass
,
class
CudaKernelClass
>
__host__
void
FCuda__transferInoutPassCallbackMpi
(
unsigned
char
*
currentCellsPtr
,
std
::
size_t
currentCellsSize
,
unsigned
char
*
externalCellsPtr
,
std
::
size_t
externalCellsSize
,
CudaKernelClass
*
kernel
,
int
idxLevel
,
const
OutOfBlockInteraction
*
outsideInteractions
,
int
nbOutsideInteractions
){
OutOfBlockInteraction
*
cuOutsideInteractions
;
FCudaCheck
(
cudaMalloc
(
&
cuOutsideInteractions
,
nbOutsideInteractions
*
sizeof
(
OutOfBlockInteraction
))
);
FCudaCheck
(
cudaMemcpy
(
cuOutsideInteractions
,
outsideInteractions
,
nbOutsideInteractions
*
sizeof
(
OutOfBlockInteraction
),
cudaMemcpyHostToDevice
)
);
FCuda__transferInoutPassPerformMpi
<
CellContainerClass
,
ParticleContainerGroupClass
,
ParticleGroupClass
,
CudaKernelClass
>
<<<
nbCudaThreads
,
nbCudaBlocks
,
0
/*starpu_cuda_get_local_stream()*/
>>>
(
currentCellsPtr
,
currentCellsSize
,
externalCellsPtr
,
externalCellsSize
,
kernel
,
idxLevel
,
cuOutsideInteractions
,
nbOutsideInteractions
);
FCudaCheckAfterCall
();
FCudaCheck
(
cudaFree
(
cuOutsideInteractions
));
}
/////////////////////////////////////////////////////////////////////////////////////
/// Transfer Pass
/////////////////////////////////////////////////////////////////////////////////////
template
<
class
CellContainerClass
,
class
ParticleContainerGroupClass
,
class
ParticleGroupClass
,
class
CudaKernelClass
>
__global__
void
FCuda__transferInPassPerform
(
unsigned
char
*
currentCellsPtr
,
std
::
size_t
currentCellsSize
,
CudaKernelClass
*
kernel
,
int
idxLevel
){
CellContainerClass
currentCells
(
currentCellsPtr
,
currentCellsSize
);
const
MortonIndex
blockStartIdx
=
currentCells
.
getStartingIndex
();
const
MortonIndex
blockEndIdx
=
currentCells
.
getEndingIndex
();
for
(
MortonIndex
mindex
=
blockStartIdx
;
mindex
<
blockEndIdx
;
++
mindex
){
unsigned
char
*
cell
=
currentCells
.
getCell
(
mindex
);
if
(
cell
){
FCudaAssertLF
(
kernel
->
getMortonIndex
(
cell
)
==
mindex
);
MortonIndex
interactionsIndexes
[
189
];
int
interactionsPosition
[
189
];
const
int3
coord
=
(
kernel
->
getCoordinate
(
cell
));
int
counter
=
FCudaTreeCoordinate
::
GetInteractionNeighbors
(
coord
,
idxLevel
,
interactionsIndexes
,
interactionsPosition
);
const
unsigned
char
*
interactions
[
343
];
memset
(
interactions
,
0
,
343
*
sizeof
(
unsigned
char
*
));
int
counterExistingCell
=
0
;
for
(
int
idxInter
=
0
;
idxInter
<
counter
;
++
idxInter
){
if
(
blockStartIdx
<=
interactionsIndexes
[
idxInter
]
&&
interactionsIndexes
[
idxInter
]
<
blockEndIdx
){
unsigned
char
*
interCell
=
currentCells
.
getCell
(
interactionsIndexes
[
idxInter
]);
if
(
interCell
){
FCudaAssertLF
(
kernel
->
getMortonIndex
(
interCell
)
==
interactionsIndexes
[
idxInter
]);
FCudaAssertLF
(
interactions
[
interactionsPosition
[
idxInter
]]
==
nullptr
);
interactions
[
interactionsPosition
[
idxInter
]]
=
interCell
;
counterExistingCell
+=
1
;
}
}
}
kernel
->
M2L
(
cell
,
interactions
,
counterExistingCell
,
idxLevel
);
}
}
}
template
<
class
CellContainerClass
,
class
ParticleContainerGroupClass
,
class
ParticleGroupClass
,
class
CudaKernelClass
>
__host__
void
FCuda__transferInPassCallback
(
unsigned
char
*
currentCellsPtr
,
std
::
size_t
currentCellsSize
,
CudaKernelClass
*
kernel
,
int
idxLevel
){
FCuda__transferInPassPerform
<
CellContainerClass
,
ParticleContainerGroupClass
,
ParticleGroupClass
,
CudaKernelClass
>
<<<
nbCudaThreads
,
nbCudaBlocks
,
0
/*starpu_cuda_get_local_stream()*/
>>>
(
currentCellsPtr
,
currentCellsSize
,
kernel
,
idxLevel
);
FCudaCheckAfterCall
();
}
template
<
class
CellContainerClass
,
class
ParticleContainerGroupClass
,
class
ParticleGroupClass
,
class
CudaKernelClass
>
__global__
void
FCuda__transferInoutPassPerform
(
unsigned
char
*
currentCellsPtr
,
std
::
size_t
currentCellsSize
,
unsigned
char
*
externalCellsPtr
,
std
::
size_t
externalCellsSize
,
CudaKernelClass
*
kernel
,
int
idxLevel
,
const
OutOfBlockInteraction
*
outsideInteractions
,
int
nbOutsideInteractions
){
CellContainerClass
currentCells
(
currentCellsPtr
,
currentCellsSize
);
CellContainerClass
cellsOther
(
externalCellsPtr
,
externalCellsSize
);
for
(
int
outInterIdx
=
0
;
outInterIdx
<
nbOutsideInteractions
;
++
outInterIdx
){
unsigned
char
*
interCell
=
cellsOther
.
getCell
(
outsideInteractions
[
outInterIdx
].
outIndex
);
if
(
interCell
){
FCudaAssertLF
(
kernel
->
getMortonIndex
(
interCell
)
==
outsideInteractions
[
outInterIdx
].
outIndex
);
unsigned
char
*
cell
=
currentCells
.
getCell
(
outsideInteractions
[
outInterIdx
].
insideIndex
);
FCudaAssertLF
(
cell
);
FCudaAssertLF
(
kernel
->
getMortonIndex
(
cell
)
==
outsideInteractions
[
outInterIdx
].
insideIndex
);
const
unsigned
char
*
interactions
[
343
];
memset
(
interactions
,
0
,
343
*
sizeof
(
unsigned
char
*
));
interactions
[
outsideInteractions
[
outInterIdx
].
outPosition
]
=
interCell
;
const
int
counter
=
1
;
kernel
->
M2L
(
cell
,
interactions
,
counter
,
idxLevel
);
interactions
[
outsideInteractions
[
outInterIdx
].
outPosition
]
=
nullptr
;
interactions
[
FMGetOppositeInterIndex
(
outsideInteractions
[
outInterIdx
].
outPosition
)]
=
cell
;
kernel
->
M2L
(
interCell
,
interactions
,
counter
,
idxLevel
);
}
}