Commit f5d468cc authored by Florent Pruvost's avatar Florent Pruvost

merge with origin/master

parents 36bc62de beec09ae
......@@ -13,33 +13,15 @@ set(CMAKE_DISABLE_IN_SOURCE_BUILD ON)
#===========================================================================
project(ScalFMM C CXX)
# directly make an error if in-source build
#if("${PROJECT_SOURCE_DIR}" STREQUAL "${PROJECT_BINARY_DIR}")
# message(FATAL_ERROR "In-source builds are not allowed.\n"
# "Please create a build directory first and execute cmake configuration from "
# "this directory. Example: mkdir build && cd build && cmake ..")
#endif()
#
# check if compiling into source directories
#string(COMPARE EQUAL "${CMAKE_SOURCE_DIR}" "${CMAKE_BINARY_DIR}" insource)
#if(insource)
# message(FATAL_ERROR "${PROJECT_NAME} requires an out of source build. Goto ./Build and tapes cmake ../")
#endif(insource)
string(COMPARE EQUAL "${CMAKE_SOURCE_DIR}" "${CMAKE_BINARY_DIR}" insource)
if(insource)
message(FATAL_ERROR "${PROJECT_NAME} requires an out of source build. Goto ./Build and tapes cmake ../")
endif(insource)
set(ScalFMM_CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/CMakeModules)
# MPI option has to be set before project, cannot be changed in the cache!
#if( ScalFMM_USE_MPI )
# include(CMakeForceCompiler)
# CMAKE_FORCE_C_COMPILER(mpicc "MPI C Compiler")
# CMAKE_FORCE_CXX_COMPILER(mpicxx "MPI C++ Compiler")
# set(ScalFMM_USE_MPI ON CACHE BOOL "ScalFMM use MPI")
#else()
# message(STATUS "Remove CMake cache and run cmake .. -DScalFMM_USE_MPI=ON to enable MPI" )
#endif(ScalFMM_USE_MPI)
#===========================================================================
# Version Number
#===========================================================================
......@@ -66,12 +48,6 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/")
endif()
include(MorseInit)
#
# Active language
# -----------------------
# enable_language(CXX)
#
#
# Options
option( ScalFMM_USE_MPI "Set to ON to build ScaFMM with MPI" OFF )
......@@ -411,6 +387,7 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/")
##################################################################
##################################################################
#
message( STATUS "ScalFMM_USE_STARPU = ${ScalFMM_USE_STARPU}" )
if( ScalFMM_USE_STARPU )
set(ScalFMM_STARPU_VERSION "1.1" CACHE STRING "oldest STARPU version desired")
......@@ -441,6 +418,26 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/")
include_directories(${STARPU_INCLUDES})
endif()
option( ScalFMM_USE_CUDA "Set to ON to use CUDA with StarPU" OFF )
message( STATUS "ScalFMM_USE_CUDA = ${ScalFMM_USE_CUDA}" )
if(ScalFMM_USE_CUDA)
execute_process(COMMAND nvcc --version ERROR_VARIABLE cuda_error_output OUTPUT_QUIET)
if(cuda_error_output)
message( FATAL_ERROR "nvcc is needed with CUDA." )
endif()
if(NOT DEFINED CUSTOM_CUDA_FLAGS)
set( CUSTOM_CUDA_FLAGS "-std=c++11;-arch=sm_20" CACHE STRING "Set your CUDA flags, for example : -arch=sm_20;-ptxas-options=-v;-use_fast_math")
endif()
# This is needed to remove backslash after space in ADD_CUSTOM_COMMAND
separate_arguments(CUSTOM_CUDA_FLAGS)
message( STATUS "CUSTOM_CUDA_FLAGS = ${CUSTOM_CUDA_FLAGS}" )
# Add libcudart and cuda.h
# link_directories($ENV{CUDA_LIB})
include_directories($ENV{CUDA_INC})
set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES}; -L$ENV{CUDA_LIB}; -lcudart")
endif()
message(STATUS " STARPU_LIBRARIES = ${STARPU_LIBRARIES}")
if (STARPU_INCLUDE_DIRS)
message(STATUS " STARPU_INCLUDES = ${STARPU_INCLUDES}")
......@@ -448,7 +445,8 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/")
endif(ScalFMM_USE_STARPU)
list(APPEND FUSE_LIST "STARPU")
#
list(APPEND FUSE_LIST "CUDA")
##################################################################
# Use SSE #
##################################################################
......@@ -479,6 +477,7 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/")
endif(${COMPILE_SSE})
endif()
list(APPEND FUSE_LIST "SSE")
##################################################################
# Use AVX #
##################################################################
......
......@@ -20,7 +20,7 @@ template<class OctreeClass,class ParticleClass>
class FAbstractMover{
public:
virtual void getParticlePosition(ParticleClass* lf, const int idxPart, FPoint* particlePos) = 0;
virtual void removeFromLeafAndKeep(ParticleClass* lf, const FPoint& particlePos, const int idxPart) = 0;
virtual void removeFromLeafAndKeep(ParticleClass* lf, const FPoint& particlePos, const int idxPart, FParticleType type) = 0;
virtual void insertAllParticles(OctreeClass* tree) = 0;
};
......
......@@ -74,6 +74,7 @@ public:
octreeIterator.gotoBottomLeft();
do{
const MortonIndex currentMortonIndex = octreeIterator.getCurrentGlobalIndex();
//First we test sources
ContainerClass * particles = octreeIterator.getCurrentLeaf()->getSrc();
for(int idxPart = 0 ; idxPart < particles->getNbParticles(); /*++idxPart*/){
FPoint currentPart;
......@@ -82,15 +83,33 @@ public:
const MortonIndex particuleIndex = tree->getMortonFromPosition(currentPart);
if(particuleIndex != currentMortonIndex){
//Need to move this one
interface->removeFromLeafAndKeep(particles,currentPart,idxPart);
interface->removeFromLeafAndKeep(particles,currentPart,idxPart,FParticleTypeSource);
}
else{
//Need to increment idx;
++idxPart;
}
}
//Then we test targets
if(octreeIterator.getCurrentLeaf()->getTargets() != particles){ //Leaf is TypedLeaf
ContainerClass * particleTargets = octreeIterator.getCurrentLeaf()->getTargets();
for(int idxPart = 0 ; idxPart < particleTargets->getNbParticles(); /*++idxPart*/){
FPoint currentPart;
interface->getParticlePosition(particleTargets,idxPart,&currentPart);
checkPosition(currentPart);
const MortonIndex particuleIndex = tree->getMortonFromPosition(currentPart);
if(particuleIndex != currentMortonIndex){
//Need to move this one
interface->removeFromLeafAndKeep(particleTargets,currentPart,idxPart, FParticleTypeTarget);
}
else{
//Need to increment idx;
++idxPart;
}
}
}
}while(octreeIterator.moveRight());
printf("Insert back particles\n");
//Insert back the parts that have been removed
interface->insertAllParticles(tree);
......@@ -101,7 +120,8 @@ public:
bool workOnNext = true;
do{
// Empty leaf
if( octreeIterator.getCurrentListTargets()->getNbParticles() == 0 ){
if( octreeIterator.getCurrentListTargets()->getNbParticles() == 0 &&
octreeIterator.getCurrentListSrc()->getNbParticles() == 0 ){
const MortonIndex currentIndex = octreeIterator.getCurrentGlobalIndex();
workOnNext = octreeIterator.moveRight();
tree->removeLeaf( currentIndex );
......
#ifndef FPARTICULETYPEDINDEXEDMOVER_HPP
#define FPARTICULETYPEDINDEXEDMOVER_HPP
#include "FAbstractMover.hpp"
#include "../Containers/FVector.hpp"
/**
* This class should be use with the octree arrange to move particles
* that are typed (src/tgt) and stored in a FBasicParticleContainer
*/
template<class OctreeClass, class ContainerClass >
class FParticleTypedIndexedMover : public FAbstractMover<OctreeClass, ContainerClass>{
private:
ContainerClass toStoreRemovedSourceParts;
ContainerClass toStoreRemovedTargetParts;
public:
FParticleTypedIndexedMover(){
}
virtual ~FParticleTypedIndexedMover(){
}
/** To get the position of the particle at idx idxPart in leaf lf */
void getParticlePosition(ContainerClass* lf, const int idxPart, FPoint* particlePos){
(*particlePos) = FPoint(lf->getPositions()[0][idxPart],lf->getPositions()[1][idxPart],lf->getPositions()[2][idxPart]);
}
/** Remove a particle but keep it to reinsert it later*/
void removeFromLeafAndKeep(ContainerClass* lf, const FPoint& particlePos, const int idxPart, FParticleType type){
std::array<typename ContainerClass::AttributesClass, ContainerClass::NbAttributes> particleValues;
for(int idxAttr = 0 ; idxAttr < ContainerClass::NbAttributes ; ++idxAttr){
particleValues[idxAttr] = lf->getAttribute(idxAttr)[idxPart];
}
if(type == FParticleTypeTarget){
toStoreRemovedTargetParts.push(particlePos,FParticleTypeTarget,lf->getIndexes()[idxPart],particleValues);
}
else{
toStoreRemovedSourceParts.push(particlePos,FParticleTypeSource,lf->getIndexes()[idxPart],particleValues);
}
lf->removeParticles(&idxPart,1);
}
/** Reinsert the previously saved particles */
void insertAllParticles(OctreeClass* tree){
std::array<typename ContainerClass::AttributesClass, ContainerClass::NbAttributes> particleValues;
for(int idxToInsert = 0; idxToInsert<toStoreRemovedSourceParts.getNbParticles() ; ++idxToInsert){
for(int idxAttr = 0 ; idxAttr < ContainerClass::NbAttributes ; ++idxAttr){
particleValues[idxAttr] = toStoreRemovedSourceParts.getAttribute(idxAttr)[idxToInsert];
}
const FPoint particlePos(toStoreRemovedSourceParts.getPositions()[0][idxToInsert],
toStoreRemovedSourceParts.getPositions()[1][idxToInsert],
toStoreRemovedSourceParts.getPositions()[2][idxToInsert]);
tree->insert(particlePos, FParticleTypeSource, toStoreRemovedSourceParts.getIndexes()[idxToInsert], particleValues);
}
for(int idxToInsert = 0; idxToInsert<toStoreRemovedTargetParts.getNbParticles() ; ++idxToInsert){
for(int idxAttr = 0 ; idxAttr < ContainerClass::NbAttributes ; ++idxAttr){
particleValues[idxAttr] = toStoreRemovedTargetParts.getAttribute(idxAttr)[idxToInsert];
}
const FPoint particlePos(toStoreRemovedTargetParts.getPositions()[0][idxToInsert],
toStoreRemovedTargetParts.getPositions()[1][idxToInsert],
toStoreRemovedTargetParts.getPositions()[2][idxToInsert]);
tree->insert(particlePos, FParticleTypeTarget, toStoreRemovedTargetParts.getIndexes()[idxToInsert], particleValues);
}
toStoreRemovedSourceParts.clear();
toStoreRemovedTargetParts.clear();
}
};
#endif //FPARTICULETYPEDINDEXEDMOVER_HPP
......@@ -17,11 +17,35 @@ file(
./*.cpp
)
# Add CUDA files once they are compiled from cu to .o
if(ScalFMM_USE_CUDA)
# Find all the CU files in my project
file(GLOB_RECURSE source_cu_files ./*.cu)
# Iterate and add builind command for each file
set( SCALFMM_CUDA_SOURCES "" )
FOREACH (_file ${source_cu_files})
GET_FILENAME_COMPONENT (_filewe ${_file} NAME_WE)
SET (_filehpp_output ${CMAKE_CURRENT_BINARY_DIR}/${_filewe}.o)
ADD_CUSTOM_COMMAND(OUTPUT ${_filehpp_output}
DEPENDS ${_file}
COMMAND echo ARGS Compiling ${_filewe}
COMMAND nvcc ARGS -c ${_file} -o ${_filehpp_output} ${CUSTOM_CUDA_FLAGS})
LIST (APPEND SCALFMM_CUDA_SOURCES ${_filehpp_output})
ENDFOREACH ()
MESSAGE( STATUS "SCALFMM_CUDA_SOURCES = ${SCALFMM_CUDA_SOURCES}" )
endif()
# Adding cpp files to project
add_library(
scalfmm
STATIC
${source_lib_files}
${SCALFMM_CUDA_SOURCES}
)
# Add blas library (even if it is set to off)
......
......@@ -291,6 +291,16 @@ public:
nbParticles += 1;
}
/**
* Push called by FTypedLeaf Through arranger
* Should have a particle position fallowed by isTarget flag and attributes
*/
template<typename... Args>
void push(const FPoint& inParticlePosition, const FParticleType type,
const std::array<AttributeClass , NbAttributesPerParticle>& values){
push(inParticlePosition,values);
}
/**
* Push called usually by FTypedLeaf with the isTarget flag in addition
*/
......
......@@ -19,6 +19,9 @@
#include <cstddef>
#include "FBasicCell.hpp"
// To get access to descriptors
struct FTestCellDescriptor;
/**
* @author Berenger Bramas (berenger.bramas@inria.fr)
* @class FBasicCell*
......@@ -111,6 +114,9 @@ public:
int getSavedSizeUp() {
return int(sizeof(long long int));
}
// To get access to descriptor
friend struct FTestCellDescriptor;
};
......
......@@ -4,13 +4,13 @@
// This software is a computer program whose purpose is to compute the FMM.
//
// This software is governed by the CeCILL-C and LGPL licenses and
// abiding by the rules of distribution of free software.
//
// abiding by the rules of distribution of free software.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public and CeCILL-C Licenses for more details.
// "http://www.cecill.info".
// "http://www.cecill.info".
// "http://www.gnu.org/licenses".
// ===================================================================================
#ifndef FFMMALGORITHMTHREADTSM_HPP
......@@ -22,7 +22,7 @@
#include "../Utils/FTic.hpp"
#include "../Utils/FGlobal.hpp"
#include "../Utils/FAlgorithmTimers.hpp"
#include "../Containers/FOctree.hpp"
#include "FCoreCommon.hpp"
......@@ -45,7 +45,7 @@
* You should not write on sources in the P2P method!
*/
template<class OctreeClass, class CellClass, class ContainerClass, class KernelClass, class LeafClass>
class FFmmAlgorithmThreadTsm : public FAbstractAlgorithm{
class FFmmAlgorithmThreadTsm : public FAbstractAlgorithm, public FAlgorithmTimers{
OctreeClass* const tree; //< The octree to work on
KernelClass** kernels; //< The kernels
......@@ -413,5 +413,3 @@ protected:
#endif //FFMMALGORITHMTHREADTSM_HPP
This diff is collapsed.
// @SCALFMM_PRIVATE
#ifndef FCUDADEVICEWRAPPER_HPP
#define FCUDADEVICEWRAPPER_HPP
#include "../../Utils/FGlobal.hpp"
#include "../FOutOfBlockInteraction.hpp"
template <class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__bottomPassCallback(unsigned char* leafCellsPtr, std::size_t leafCellsSize,
unsigned char* containersPtr, std::size_t containersSize,
CudaKernelClass* kernel);
template <class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__upwardPassCallback(unsigned char* currentCellsPtr, std::size_t currentCellsSize,
unsigned char* subCellGroupsPtr[9], std::size_t subCellGroupsSize[9],
CudaKernelClass* kernel, int nbSubCellGroups, int idxLevel);
template <class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__transferInoutPassCallbackMpi(unsigned char* currentCellsPtr, std::size_t currentCellsSize,
unsigned char* externalCellsPtr, std::size_t externalCellsSize,
CudaKernelClass* kernel, int idxLevel, const OutOfBlockInteraction* outsideInteractions,
int nbOutsideInteractions);
template <class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__transferInPassCallback(unsigned char* currentCellsPtr, std::size_t currentCellsSize,
CudaKernelClass* kernel, int idxLevel);
template <class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__transferInoutPassCallback(unsigned char* currentCellsPtr, std::size_t currentCellsSize,
unsigned char* externalCellsPtr, std::size_t externalCellsSize,
CudaKernelClass* kernel, int idxLevel, const OutOfBlockInteraction* outsideInteractions,
int nbOutsideInteractions);
template <class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__downardPassCallback(unsigned char* currentCellsPtr, std::size_t currentCellsSize,
unsigned char* subCellGroupsPtr[9], std::size_t subCellGroupsSize[9],
CudaKernelClass* kernel, int nbSubCellGroups, int idxLevel);
template <class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__directInoutPassCallbackMpi(unsigned char* containersPtr, std::size_t containersSize,
unsigned char* externalContainersPtr, std::size_t externalContainersSize,
CudaKernelClass* kernel, const OutOfBlockInteraction* outsideInteractions,
int nbOutsideInteractions, const int treeHeight);
template <class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__directInPassCallback(unsigned char* containersPtr, std::size_t containersSize,
CudaKernelClass* kernel, const int treeHeight);
template <class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__directInoutPassCallback(unsigned char* containersPtr, std::size_t containersSize,
unsigned char* externalContainersPtr, std::size_t externalContainersSize,
CudaKernelClass* kernel, const OutOfBlockInteraction* outsideInteractions,
int nbOutsideInteractions, const int treeHeight);
template <class CellContainerClass, class ParticleContainerGroupClass, class ParticleGroupClass, class CudaKernelClass>
void FCuda__mergePassCallback(unsigned char* leafCellsPtr, std::size_t leafCellsSize,
unsigned char* containersPtr, std::size_t containersSize,
CudaKernelClass* kernel);
template <class CudaKernelClass>
CudaKernelClass* FCuda__BuildCudaKernel(void*);
template <class CudaKernelClass>
void FCuda__ReleaseCudaKernel(CudaKernelClass*);
#endif
/// @SCALFMM_PRIVATE
#ifndef FCUDAEMPTYKERNEL_HPP
#define FCUDAEMPTYKERNEL_HPP
#include "FCudaGlobal.hpp"
#include "FCudaGroupAttachedLeaf.hpp"
#include "../../Components/FTestCell.hpp"
/**
* This class defines what should be a Cuda kernel.
*/
template <class ContainerClass = FCudaGroupAttachedLeaf<0, int>>
class FCudaEmptyKernel {
protected:
public:
__device__ void P2M(unsigned char* const /*pole*/, const ContainerClass* const /*particles*/) {
}
__device__ void M2M(unsigned char* const /*pole*/, const unsigned char *const *const /*child*/, const int /*level*/) {
}
__device__ void M2L(unsigned char* const /*pole*/, const unsigned char* /*distantNeighbors*/[343],
const int /*size*/, const int /*level*/) {
}
__device__ void L2L(const unsigned char*const /*local*/, unsigned char* *const /*child*/, const int /*level*/) {
}
__device__ void L2P(const unsigned char* const /*local*/, ContainerClass*const /*particles*/){
}
__device__ void P2P(const int3& ,
ContainerClass* const /*targets*/, const ContainerClass* const /*sources*/,
ContainerClass* const /*directNeighborsParticles*/[27], const int ){
}
__device__ void P2PRemote(const int3& ,
ContainerClass* const /*targets*/, const ContainerClass* const /*sources*/,
ContainerClass* const /*directNeighborsParticles*/[27], const int ){
}
__device__ MortonIndex getMortonIndex(const unsigned char* /*cell*/) const{
return 0;
}
__device__ int3 getCoordinate(const unsigned char* /*cell*/) const{
int3 coord;
coord.x = coord.y = coord.z = 0;
return coord;
}
__host__ static FCudaEmptyKernel* InitKernelKernel(void*){
return nullptr;
}
__host__ static void ReleaseKernel(FCudaEmptyKernel* /*todealloc*/){
// nothing to do
}
};
#endif // FCUDAEMPTYKERNEL_HPP
// @SCALFMM_PRIVATE
#ifndef FCUDAGLOBAL_HPP
#define FCUDAGLOBAL_HPP
#include "../../Utils/FGlobal.hpp"
// Manage special case for nvcc
#if defined(__CUDACC__) || defined(__NVCC__)
#else
#endif
#include <cuda.h>
#endif // FCUDAGLOBAL_HPP
// @SCALFMM_PRIVATE
#ifndef FCUDAGROUPATTACHEDLEAF_HPP
#define FCUDAGROUPATTACHEDLEAF_HPP
#include "FCudaGlobal.hpp"
template <unsigned NbAttributesPerParticle, class AttributeClass = FReal>
class FCudaGroupAttachedLeaf {
protected:
//< Nb of particles in the current leaf
int nbParticles;
//< Pointers to the positions of the particles
FReal* positionsPointers[3];
//< Pointers to the attributes of the particles
AttributeClass* attributes[NbAttributesPerParticle];
public:
/** Empty constructor to point to nothing */
__device__ FCudaGroupAttachedLeaf() : nbParticles(-1) {
memset(positionsPointers, 0, sizeof(FReal*) * 3);
memset(attributes, 0, sizeof(AttributeClass*) * NbAttributesPerParticle);
}
/**
* @brief FCudaGroupAttachedLeaf
* @param inNbParticles the number of particles in the leaf
* @param inPositionBuffer the memory address of the X array of particls
* @param inLeadingPosition each position is access by inPositionBuffer + in bytes inLeadingPosition*idx
* @param inAttributesBuffer the memory address of the first attribute
* @param inLeadingAttributes each attribute is access by inAttributesBuffer + in bytes inLeadingAttributes*idx
*/
__device__ FCudaGroupAttachedLeaf(const int inNbParticles, FReal* inPositionBuffer, const size_t inLeadingPosition,
AttributeClass* inAttributesBuffer, const size_t inLeadingAttributes)
: nbParticles(inNbParticles){
// Redirect pointers to position
positionsPointers[0] = inPositionBuffer;
positionsPointers[1] = reinterpret_cast<FReal*>(reinterpret_cast<unsigned char*>(inPositionBuffer) + inLeadingPosition);
positionsPointers[2] = reinterpret_cast<FReal*>(reinterpret_cast<unsigned char*>(inPositionBuffer) + inLeadingPosition*2);
// Redirect pointers to data
for(unsigned idxAttribute = 0 ; idxAttribute < NbAttributesPerParticle ; ++idxAttribute){
attributes[idxAttribute] = reinterpret_cast<AttributeClass*>(reinterpret_cast<unsigned char*>(inAttributesBuffer) + idxAttribute*inLeadingAttributes);
}
}
/** Copy the attached group to another one (copy the pointer not the content!) */
__device__ FCudaGroupAttachedLeaf(const FCudaGroupAttachedLeaf& other) : nbParticles(other.nbParticles) {
positionsPointers[0] = other.positionsPointers[0];
positionsPointers[1] = other.positionsPointers[1];
positionsPointers[2] = other.positionsPointers[2];
// Redirect pointers to data
for(unsigned idxAttribute = 0 ; idxAttribute < NbAttributesPerParticle ; ++idxAttribute){
attributes[idxAttribute] = other.attributes[idxAttribute];
}
}
/** Copy the attached group to another one (copy the pointer not the content!) */
__device__ FCudaGroupAttachedLeaf& operator=(const FCudaGroupAttachedLeaf& other){
nbParticles = (other.nbParticles);
positionsPointers[0] = other.positionsPointers[0];
positionsPointers[1] = other.positionsPointers[1];
positionsPointers[2] = other.positionsPointers[2];
// Redirect pointers to data
for(unsigned idxAttribute = 0 ; idxAttribute < NbAttributesPerParticle ; ++idxAttribute){
attributes[idxAttribute] = other.attributes[idxAttribute];
}
return (*this);
}
/**
* @brief getNbParticles
* @return the number of particles in the leaf
*/
__device__ int getNbParticles() const{
return nbParticles;
}
/**
* @brief getPositions
* @return a FReal*[3] to get access to the positions
*/
__device__ const FReal*const* getPositions() const {
return positionsPointers;
}
/**
* @brief getWPositions
* @return get the position in write mode
*/
__device__ FReal* const* getWPositions() {
return positionsPointers;
}
/**
* @brief getAttribute
* @param index
* @return the attribute at index index
*/
__device__ AttributeClass* getAttribute(const int index) {
return attributes[index];
}
/**
* @brief getAttribute
* @param index
* @return
*/
__device__ const AttributeClass* getAttribute(const int index) const {
return attributes[index];
}
/**
* Get the attribute with a forcing compile optimization
*/
template <int index>
__device__ AttributeClass* getAttribute() {
static_assert(index < NbAttributesPerParticle, "Index to get attributes is out of scope.");
return attributes[index];
}
/**
* Get the attribute with a forcing compile optimization
*/
template <int index>
__device__ const AttributeClass* getAttribute() const {
static_assert(index < NbAttributesPerParticle, "Index to get attributes is out of scope.");
return attributes[index];
}
/** Return true if it has been attached to a memoy block */
__device__ bool isAttachedToSomething() const {
return nbParticles != -1;
}
/** Copy data for one particle (from the ParticleClassContainer into the attached buffer) */
template<class ParticleClassContainer>
__device__ void setParticle(const int destPartIdx, const int srcPartIdx, const ParticleClassContainer* particles){
// Copy position
positionsPointers[0][destPartIdx] = particles->getPositions()[0][srcPartIdx];
positionsPointers[1][destPartIdx] = particles->getPositions()[1][srcPartIdx];
positionsPointers[2][destPartIdx] = particles->getPositions()[2][srcPartIdx];
// Copy data
for(unsigned idxAttribute = 0 ; idxAttribute < NbAttributesPerParticle ; ++idxAttribute){
attributes[idxAttribute][destPartIdx] = particles->getAttribute(idxAttribute)[srcPartIdx];
}
}
};
#endif // FCUDAGROUPATTACHEDLEAF_HPP
// @SCALFMM_PRIVATE
#ifndef FCUDAGROUPOFCELLS_HPP
#define FCUDAGROUPOFCELLS_HPP
#include "FCudaGlobal.hpp"
/**
* @brief The FCudaGroupOfCells class manages the cells in block allocation.
*/
template <const size_t CellClassSize>
class FCudaGroupOfCells {
/** One header is allocated at the beginning of each block */
struct BlockHeader{
MortonIndex startingIndex;
MortonIndex endingIndex;
int numberOfCellsInBlock;
int blockIndexesTableSize;
};
protected:
//< The size of the memoryBuffer
int allocatedMemoryInByte;
//< Pointer to a block memory
unsigned char* memoryBuffer;
//< Pointer to the header inside the block memory
BlockHeader* blockHeader;
//< Pointer to the indexes table inside the block memory
int* blockIndexesTable;
//< Pointer to the cells inside the block memory
unsigned char* blockCells;
//< This value is for not used cells
static const MortonIndex CellIsEmptyFlag = -1;
public:
__device__ FCudaGroupOfCells()
: allocatedMemoryInByte(0), memoryBuffer(nullptr),
blockHeader(nullptr), blockIndexesTable(nullptr), blockCells(nullptr){
}
__device__ void reset(unsigned char* inBuffer, const size_t inAllocatedMemoryInByte){
// Move the pointers to the correct position
allocatedMemoryInByte = (inAllocatedMemoryInByte);
memoryBuffer = (inBuffer);