Commit 9930f73e authored by BLANCHARD Pierre's avatar BLANCHARD Pierre
Browse files
parents 8bd7dbf0 e9ba19e1
......@@ -199,6 +199,9 @@ public:
arrayOfUserContainer[idx] = neighbors[idx]->getContainer();
}
kernel.p2p_full(targets->getContainer(),targets->getNbParticles(),targets->getIndexes().data(),arrayOfUserContainer,indicesPerNeighbors,nbPartPerNeighbors,sourcePosition,size,userData);
delete [] nbPartPerNeighbors;
delete [] indicesPerNeighbors;
delete [] arrayOfUserContainer;
}
if(kernel.p2p_sym){
for(int idx = 0 ; ((idx < size) && (sourcePosition[idx] < 14)) ; ++idx){
......@@ -507,7 +510,7 @@ public:
position[2] = boxCorner.getZ() + currCoord.getZ()*boxwidth/double(1<<currLevel);
leaf->getSrc()->setContainer(CoreCell::GetInitLeaf()(currLevel,leaf->getSrc()->getNbParticles(),
leaf->getSrc()->getIndexes().data(), currMorton,
position, currCell->getContainer(), this->kernel));
position, currCell->getContainer(), this->kernel->getUserKernelDatas()));
});
}
......@@ -515,7 +518,8 @@ public:
void free_cell(Callback_free_cell user_cell_deallocator, Callback_free_leaf free_leaf){
octree->forEachCellLeaf([&](CoreCell * currCell, LeafClass * leaf){
free_leaf(currCell->getContainer(),leaf->getSrc()->getNbParticles(), leaf->getSrc()->getIndexes().data(),leaf,this->kernel);
free_leaf(currCell->getContainer(),leaf->getSrc()->getNbParticles(), leaf->getSrc()->getIndexes().data(),
leaf->getSrc()->getContainer(),this->kernel->getUserKernelDatas());
});
octree->forEachCell([&](CoreCell * currCell){
if(currCell->getContainer()){
......
......@@ -34,15 +34,17 @@ void cheb_free_cell(void * inCell){
* No need for leaf function
*/
void * cheb_init_leaf(int level, FSize nbParts, const FSize * idxParts, long long morton_index, double center[3],
void * cellDatas, void * userDatas){
void * cellDatas, void * userDatas){
//Do nothing
return NULL;
int * A = malloc(sizeof(double) * nbParts);
return A;
}
/**
* No need for leaf function
*/
void cheb_free_leaf(void * cellDatas, FSize nbParts, const FSize * idxParts, void * leafData, void * userDatas){
free(leafData);
//Do nothing
}
......
......@@ -55,6 +55,7 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/CMakeModules/morse/
include(MorseInit)
#
# Options
#
option( SCALFMM_USE_MPI "Set to ON to build ScaFMM with MPI" OFF )
option( SCALFMM_USE_BLAS "Set to ON to build ScaFMM with BLAS" ON )
option( SCALFMM_USE_FFT "Set to ON to build ScaFMM with FFTW" ON )
......@@ -69,6 +70,16 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/CMakeModules/morse/
option( SCALFMM_ATTACHE_SOURCE "Set to ON to compile with -g" OFF )
option( SCALFMM_USE_ADDONS "Set to ON to compile add ons" OFF )
option( SCALFMM_USE_SIGNALS "Set to ON to catch various signal an print backtrace" OFF )
option( SCALFMM_USE_ASSERT "Set to ON to enable safe tests during execution" ON )
option( SCALFMM_USE_MIC_NATIVE "Set to ON to compile in native mode for MIC" OFF )
option( SCALFMM_ONLY_DEVEL "Set to ON to compile Development tools (only scalfmm team)" ON )
option( SCALFMM_USE_EZTRACE "Set to ON to compile with eztrace framwork" OFF )
option( SCALFMM_USE_STARPU "Set to ON to build SCALFMM with StarPU" OFF )
option( SCALFMM_BUILD_UTILS "Set to ON to build utils Tests" OFF )
#
# VECTORISATION
#
if( APPLE ) # to fix problem with GCC and avx
CMAKE_DEPENDENT_OPTION( SCALFMM_USE_SSE "Set to ON to compile with SSE support (and use intrinsec SSE P2P)" ON "CPUOPTION_SSE3;NOT CPUOPTION_AVX2" OFF )
CMAKE_DEPENDENT_OPTION( SCALFMM_USE_AVX "Set to ON to compile with AVX support (and use intrinsec AVX P2P)" OFF "CPUOPTION_AVX; NOT CPUOPTION_AVX2" OFF )
......@@ -76,24 +87,24 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/CMakeModules/morse/
CMAKE_DEPENDENT_OPTION( SCALFMM_USE_SSE "Set to ON to compile with SSE support (and use intrinsec SSE P2P)" ON "CPUOPTION_SSE3;NOT CPUOPTION_AVX;NOT CPUOPTION_AVX2" OFF )
CMAKE_DEPENDENT_OPTION( SCALFMM_USE_AVX "Set to ON to compile with AVX support (and use intrinsec AVX P2P)" ON "CPUOPTION_AVX; NOT CPUOPTION_AVX2" OFF )
endif(APPLE)
CMAKE_DEPENDENT_OPTION( SCALFMM_USE_AVX2 "Set to ON to compile with AVX support (and use intrinsec AVXZ P2P)" ON "CPUOPTION_AVX2" OFF )
option( SCALFMM_USE_ASSERT "Set to ON to enable safe tests during execution" ON )
option( SCALFMM_USE_MIC_NATIVE "Set to ON to compile in native mode for MIC" OFF )
option( SCALFMM_ONLY_DEVEL "Set to ON to compile Development tools (only scalfmm team)" ON )
option( SCALFMM_USE_EZTRACE "Set to ON to compile with eztrace framwork" OFF )
option( SCALFMM_USE_STARPU "Set to ON to build SCALFMM with StarPU" OFF )
CMAKE_DEPENDENT_OPTION( SCALFMM_USE_AVX2 "Set to ON to compile with AVX support (and use intrinsec AVX2 P2P)" ON "CPUOPTION_AVX2" OFF )
if( SCALFMM_ONLY_DEVEL )
# OPENMP 4/5 support
option( OPENMP_SUPPORT_COMMUTE "Set to ON to let tasks commute (KSTAR/StarPU compiler only)" OFF )
option( OPENMP_SUPPORT_PRIORITY "Set to ON to enable tasks priority (KSTAR/StarPU compiler only)" OFF )
option( SCALFMM_BUILD_UTILS "Set to ON to build utils Tests" OFF )
option( SCALFMM_DISABLE_NATIVE_OMP4 "Set to ON to disable the gcc/intel omp4" OFF )
option( SCALFMM_TIME_OMPTASKS "Set to ON to time omp4 tasks and generate output file" OFF )
option( SCALFMM_STARPU_USE_COMMUTE "Set to ON to enable commute with StarPU" ON )
option( SCALFMM_STARPU_USE_REDUX "Set to ON to enable redux with StarPU" OFF)
option( SCALFMM_STARPU_USE_PRIO "Set to ON to enable priority with StarPU" ON )
option( SCALFMM_STARPU_FORCE_NO_SCHEDULER "Set to ON to disable heteroprio even if supported" OFF )
# STARPU options
CMAKE_DEPENDENT_OPTION(SCALFMM_STARPU_USE_COMMUTE "Set to ON to enable commute with StarPU" ON "SCALFMM_USE_STARPU" OFF)
CMAKE_DEPENDENT_OPTION(SCALFMM_STARPU_USE_REDUX "Set to ON to enable redux with StarPU" OFF "SCALFMM_USE_STARPU" OFF)
CMAKE_DEPENDENT_OPTION(SCALFMM_STARPU_USE_PRIO "Set to ON to enable priority with StarPU" ON "SCALFMM_USE_STARPU" OFF)
CMAKE_DEPENDENT_OPTION(SCALFMM_STARPU_FORCE_NO_SCHEDULER "Set to ON to disable heteroprio even if supported" OFF "SCALFMM_USE_STARPU" OFF)
endif()
message(STATUS "AVANT ${CMAKE_CXX_COMPILER_ID}" )
#
# MPI
#
if( SCALFMM_USE_MPI )
try_compile(COMPILE_INTEL ${CMAKE_CURRENT_BINARY_DIR}
${SCALFMM_CMAKE_MODULE_PATH}/compileTestIntel.cpp
......
......@@ -5,7 +5,7 @@
*
* \section overV Overview
*
* ScalFMM is a software library to simulate N-body interactions using
* ScalFMM is a software/library to simulate N-body interactions using
* the Fast Multipole Method. This is a <b>kernel independent fast multipole
method</b> based on interpolation ( Chebyshev or Lagrange).
*
......@@ -46,12 +46,12 @@ method</b> based on interpolation ( Chebyshev or Lagrange).
* This software is distributed under a specific License. For more
* informations, see \ref License.
*
* If you want to cite the project and/or the team, please contact
* us. See \ref contacts page.
* If you want to cite the project and/or the team, please contact us. See \ref contacts page.
*
*
* \subsection Publications
* <ul>
* <li> Fast hierarchical algorithms for generating Gaussian random fields (<a href="https://hal.inria.fr/hal-01228519">https://hal.inria.fr/hal-01228519</a>)
* <li>Task-based FMM for heterogeneous architectures, Emmanuel Agullo; Berenger Bramas; Olivier Coulaud,;Eric Darve; Matthias Messner; Toru Takahashi, Inria Research Report, 2014, pp. 29. <a href="http://hal.inria.fr/docs/00/97/46/74/PDF/RR-8513.pdf">RR-8513</a>
* <li> Task-Based FMM for Multicore Architectures, Emmanuel Agullo; Berenger Bramas; Olivier Coulaud , Eric Darve; Matthias Messner; Toru Takahashi SIAM Journal on Scientific Computing, SIAM, 2014, 36 (1), pp. 66-93
DOI : <a href="http://epubs.siam.org/doi/abs/10.1137/130915662"> 10.1137/130915662</a></li>
......
......@@ -10,9 +10,10 @@ version 1.1
1.4
-----
- Add interpolation FMM based on uniform grid points
- Add the group tree and the things related to starpu
- Add the group tree and the things related to starpu (runtime system)
- Change the P2P (it is now done by tree functions in the abstract kernel) refer to the P2P page of the doc
- Update the P2P/M2L prototype, the position of the elements is knowns from an array position
- Update the P2P/M2L prototype, the position of the elements is known from an array position
- Fix bug in SSE, AVX, ... kernel 1/r
1.3
-----
......
......@@ -23,7 +23,10 @@ make
# And access executables in scalfmm/Build/Tests/{Release,Debug}/.....
To compile with StarPU you can pass, as an example:
cmake .. -DSCALFMM_USE_STARPU=ON -DSCALFMM_USE_CUDA=OFF -DSCALFMM_USE_OPENCL=OFF -DHWLOC_DIR=/home/berenger/Téléchargements/hwloc-1.10.0/install/ -DSTARPU_DIR=/home/berenger/Téléchargements/starpu-work/StarPU/installwithfxt
cmake .. -DSCALFMM_USE_STARPU=ON -DSCALFMM_USE_CUDA=OFF -DSCALFMM_USE_OPENCL=OFF \
-DHWLOC_DIR=/home/berenger/Download/hwloc-1.10.0/install/ \
-DSTARPU_DIR=/home/berenger/Download/starpu-work/StarPU/installwithfxt
---------------------------------------------------
---------------------------------------------------
......@@ -58,14 +61,14 @@ Contact the developers at : scalfmm-public-support@lists.gforge.inria.fr
What inside :
=============
× Src : The Core of Scalfmm is under the Src directory. Users should not need to modify the source.
- Src : The Core of Scalfmm is under the Src directory. Users should not need to modify the source.
One can want to implement its own kernel or even its own parallelization whithout modifying the sources.
× Data : example of particles distributions
× Examples : examples of very common usage of Scalfmm
× Doc : should contains the generated Doc
× UTests : contains some unit tests (it can be a good example to understand some features)
× Tests : examples to know how to use scalfmm/put particles in the tree/iterate on the tree...
× Utils : some scripts to work with the data files.
- Data : example of particles distributions
- Examples : examples of very common usage of Scalfmm
- Doc : should contains the generated Doc
- UTests : contains some unit tests (it can be a good example to understand some features)
- Tests : examples to know how to use scalfmm/put particles in the tree/iterate on the tree...
- Utils : some scripts to work with the data files.
---------------------------------------------------
......
......@@ -31,17 +31,8 @@
#undef priority_if_supported
#ifdef OPENMP_SUPPORT_PRIORITY
#include "../StarPUUtils/FOmpPriorities.hpp"
#define priority_if_supported(x) priority(x)
enum FGroupTaskDepAlgorithm_Priorities{
FGroupTaskDepAlgorithm_Prio_P2M = 9,
FGroupTaskDepAlgorithm_Prio_M2M = 8,
FGroupTaskDepAlgorithm_Prio_M2L_High = 7,
FGroupTaskDepAlgorithm_Prio_L2L = 6,
FGroupTaskDepAlgorithm_Prio_P2P_Big = 5,
FGroupTaskDepAlgorithm_Prio_M2L = 4,
FGroupTaskDepAlgorithm_Prio_L2P = 3,
FGroupTaskDepAlgorithm_Prio_P2P_Small = 2
};
#else
#define priority_if_supported(x)
#endif
......@@ -70,12 +61,19 @@ protected:
FTaskTimer taskTimeRecorder;
#endif
#ifdef OPENMP_SUPPORT_PRIORITY
FOmpPriorities priorities;
#endif
public:
FGroupTaskDepAlgorithm(OctreeClass*const inTree, KernelClass* inKernels, const int inMaxThreads = -1)
: MaxThreads(inMaxThreads==-1?omp_get_max_threads():inMaxThreads), tree(inTree), kernels(nullptr),
noCommuteAtLastLevel(getenv("SCALFMM_NO_COMMUTE_LAST_L2L") != NULL && getenv("SCALFMM_NO_COMMUTE_LAST_L2L")[0] != '1'?false:true)
#ifdef SCALFMM_TIME_OMPTASKS
, taskTimeRecorder(MaxThreads)
#endif
#ifdef OPENMP_SUPPORT_PRIORITY
, priorities(tree->getHeight())
#endif
{
FAssertLF(tree, "tree cannot be null");
......@@ -374,7 +372,7 @@ protected:
ParticleGroupClass* containers = tree->getParticleGroup(idxGroup);
#pragma omp task default(shared) firstprivate(leafCells, cellPoles, containers) depend(inout: cellPoles[0]) priority_if_supported(FGroupTaskDepAlgorithm_Prio_P2M)
#pragma omp task default(shared) firstprivate(leafCells, cellPoles, containers) depend(inout: cellPoles[0]) priority_if_supported(priorities.getInsertionPosP2M())
{
FTIME_TASKS(FTaskTimer::ScopeEvent taskTime(omp_get_thread_num(), &taskTimeRecorder, leafCells->getStartingIndex() * 20 * 8, "P2M"));
KernelClass*const kernel = kernels[omp_get_thread_num()];
......@@ -419,7 +417,7 @@ protected:
subCellGroup = (*iterChildCells);
subCellGroupPoles = (*iterChildCells)->getRawMultipoleBuffer();
#pragma omp task default(none) firstprivate(idxLevel, currentCells, cellPoles, subCellGroup, subCellGroupPoles) depend(commute_if_supported: cellPoles[0]) depend(in: subCellGroupPoles[0]) priority_if_supported(FGroupTaskDepAlgorithm_Prio_M2M)
#pragma omp task default(none) firstprivate(idxLevel, currentCells, cellPoles, subCellGroup, subCellGroupPoles) depend(commute_if_supported: cellPoles[0]) depend(in: subCellGroupPoles[0]) priority_if_supported(priorities.getInsertionPosM2M(idxLevel))
{
KernelClass*const kernel = kernels[omp_get_thread_num()];
const MortonIndex firstParent = FMath::Max(currentCells->getStartingIndex(), subCellGroup->getStartingIndex()>>3);
......@@ -494,7 +492,7 @@ protected:
PoleCellClass* cellPoles = currentCells->getRawMultipoleBuffer();
LocalCellClass* cellLocals = currentCells->getRawLocalBuffer();
#pragma omp task default(none) firstprivate(currentCells, cellPoles, cellLocals, idxLevel) depend(commute_if_supported: cellLocals[0]) depend(in: cellPoles[0]) priority_if_supported(idxLevel==FAbstractAlgorithm::lowerWorkingLevel-1?FGroupTaskDepAlgorithm_Prio_M2L:FGroupTaskDepAlgorithm_Prio_M2L_High)
#pragma omp task default(none) firstprivate(currentCells, cellPoles, cellLocals, idxLevel) depend(commute_if_supported: cellLocals[0]) depend(in: cellPoles[0]) priority_if_supported(priorities.getInsertionPosM2L(idxLevel))
{
FTIME_TASKS(FTaskTimer::ScopeEvent taskTime(omp_get_thread_num(), &taskTimeRecorder, ((currentCells->getStartingIndex() *20) + idxLevel ) * 8 + 2, "M2L"));
const MortonIndex blockStartIdx = currentCells->getStartingIndex();
......@@ -557,7 +555,7 @@ protected:
LocalCellClass* cellOtherLocals = cellsOther->getRawLocalBuffer();
const std::vector<OutOfBlockInteraction>* outsideInteractions = &(*currentInteractions).interactions;
#pragma omp task default(none) firstprivate(currentCells, cellLocals, outsideInteractions, cellsOther, cellOtherPoles, idxLevel) depend(commute_if_supported: cellLocals[0]) depend(in: cellOtherPoles[0]) priority_if_supported(idxLevel==FAbstractAlgorithm::lowerWorkingLevel-1?FGroupTaskDepAlgorithm_Prio_M2L:FGroupTaskDepAlgorithm_Prio_M2L_High)
#pragma omp task default(none) firstprivate(currentCells, cellLocals, outsideInteractions, cellsOther, cellOtherPoles, idxLevel) depend(commute_if_supported: cellLocals[0]) depend(in: cellOtherPoles[0]) priority_if_supported(priorities.getInsertionPosM2LExtern(idxLevel))
{
FTIME_TASKS(FTaskTimer::ScopeEvent taskTime(omp_get_thread_num(), &taskTimeRecorder, (((currentCells->getStartingIndex()+1) * (cellsOther->getStartingIndex()+2)) * 20 + idxLevel) * 8 + 3, "M2L-ext"));
KernelClass*const kernel = kernels[omp_get_thread_num()];
......@@ -573,7 +571,7 @@ protected:
}
}
#pragma omp task default(none) firstprivate(currentCells, cellPoles, outsideInteractions, cellsOther, cellOtherLocals, idxLevel) depend(commute_if_supported: cellOtherLocals[0]) depend(in: cellPoles[0]) priority_if_supported(idxLevel==FAbstractAlgorithm::lowerWorkingLevel-1?FGroupTaskDepAlgorithm_Prio_M2L:FGroupTaskDepAlgorithm_Prio_M2L_High)
#pragma omp task default(none) firstprivate(currentCells, cellPoles, outsideInteractions, cellsOther, cellOtherLocals, idxLevel) depend(commute_if_supported: cellOtherLocals[0]) depend(in: cellPoles[0]) priority_if_supported(priorities.getInsertionPosM2LExtern(idxLevel))
{
FTIME_TASKS(FTaskTimer::ScopeEvent taskTime(omp_get_thread_num(), &taskTimeRecorder, (((currentCells->getStartingIndex()+1) * (cellsOther->getStartingIndex()+1)) * 20 + idxLevel) * 8 + 3, "M2L-ext"));
KernelClass*const kernel = kernels[omp_get_thread_num()];
......@@ -633,7 +631,7 @@ protected:
subCellLocalGroupsLocal = (*iterChildCells)->getRawLocalBuffer();
if(noCommuteAtLastLevel == false || idxLevel != FAbstractAlgorithm::lowerWorkingLevel - 2){
#pragma omp task default(none) firstprivate(idxLevel, currentCells, cellLocals, subCellGroup, subCellLocalGroupsLocal) depend(commute_if_supported: subCellLocalGroupsLocal[0]) depend(in: cellLocals[0]) priority_if_supported(FGroupTaskDepAlgorithm_Prio_L2L)
#pragma omp task default(none) firstprivate(idxLevel, currentCells, cellLocals, subCellGroup, subCellLocalGroupsLocal) depend(commute_if_supported: subCellLocalGroupsLocal[0]) depend(in: cellLocals[0]) priority_if_supported(priorities.getInsertionPosL2L(idxLevel))
{
KernelClass*const kernel = kernels[omp_get_thread_num()];
......@@ -676,7 +674,7 @@ protected:
}
}
else{
#pragma omp task default(none) firstprivate(idxLevel, currentCells, cellLocals, subCellGroup, subCellLocalGroupsLocal) depend(inout: subCellLocalGroupsLocal[0]) depend(in: cellLocals[0]) priority_if_supported(FGroupTaskDepAlgorithm_Prio_L2L)
#pragma omp task default(none) firstprivate(idxLevel, currentCells, cellLocals, subCellGroup, subCellLocalGroupsLocal) depend(inout: subCellLocalGroupsLocal[0]) depend(in: cellLocals[0]) priority_if_supported(priorities.getInsertionPosL2L(idxLevel))
{
KernelClass*const kernel = kernels[omp_get_thread_num()];
......@@ -762,7 +760,7 @@ protected:
unsigned char* containersOtherDown = containersOther->getRawAttributesBuffer();
const std::vector<OutOfBlockInteraction>* outsideInteractions = &(*currentInteractions).interactions;
#pragma omp task default(none) firstprivate(containers, containersDown, containersOther, containersOtherDown, outsideInteractions) depend(commute_if_supported: containersOtherDown[0], containersDown[0]) priority_if_supported(FGroupTaskDepAlgorithm_Prio_P2P_Small)
#pragma omp task default(none) firstprivate(containers, containersDown, containersOther, containersOtherDown, outsideInteractions) depend(commute_if_supported: containersOtherDown[0], containersDown[0]) priority_if_supported(priorities.getInsertionPosP2PExtern())
{
FTIME_TASKS(FTaskTimer::ScopeEvent taskTime(omp_get_thread_num(), &taskTimeRecorder, ((containersOther->getStartingIndex()+1) * (containers->getStartingIndex()+1))*20*8 + 6, "P2P-ext"));
KernelClass*const kernel = kernels[omp_get_thread_num()];
......@@ -800,7 +798,7 @@ protected:
ParticleGroupClass* containers = (*iterParticles);
unsigned char* containersDown = containers->getRawAttributesBuffer();
#pragma omp task default(none) firstprivate(containers, containersDown) depend(commute_if_supported: containersDown[0]) priority_if_supported(FGroupTaskDepAlgorithm_Prio_P2P_Big)
#pragma omp task default(none) firstprivate(containers, containersDown) depend(commute_if_supported: containersDown[0]) priority_if_supported(priorities.getInsertionPosP2P())
{
FTIME_TASKS(FTaskTimer::ScopeEvent taskTime(omp_get_thread_num(), &taskTimeRecorder, containers->getStartingIndex()*20*8 + 5, "P2P"));
const MortonIndex blockStartIdx = containers->getStartingIndex();
......@@ -855,7 +853,7 @@ protected:
ParticleGroupClass* containers = tree->getParticleGroup(idxGroup);
unsigned char* containersDown = containers->getRawAttributesBuffer();
#pragma omp task default(shared) firstprivate(leafCells, cellLocals, containers, containersDown) depend(commute_if_supported: containersDown[0]) depend(in: cellLocals[0]) priority_if_supported(FGroupTaskDepAlgorithm_Prio_L2P)
#pragma omp task default(shared) firstprivate(leafCells, cellLocals, containers, containersDown) depend(commute_if_supported: containersDown[0]) depend(in: cellLocals[0]) priority_if_supported(priorities.getInsertionPosL2P())
{
FTIME_TASKS(FTaskTimer::ScopeEvent taskTime(omp_get_thread_num(), &taskTimeRecorder, (leafCells->getStartingIndex()*20*8) + 7, "L2P"));
KernelClass*const kernel = kernels[omp_get_thread_num()];
......
#ifndef FOMPPRIORITIES_HPP
#define FOMPPRIORITIES_HPP
#include "../../Utils/FGlobal.hpp"
class FOmpPriorities{
int insertionPositionP2M;
int insertionPositionM2M;
int insertionPositionP2MSend;
int insertionPositionM2MSend;
int insertionPositionM2L;
int insertionPositionM2LExtern;
int insertionPositionM2LLastLevel;
int insertionPositionL2L;
int insertionPositionL2P;
int insertionPositionP2P;
int insertionPositionP2PExtern;
int treeHeight;
int maxprio;
public:
FOmpPriorities(const int inTreeHeight) :
insertionPositionP2M(0), insertionPositionM2M(0), insertionPositionP2MSend(0),
insertionPositionM2MSend(0), insertionPositionM2L(0), insertionPositionM2LExtern(0),
insertionPositionM2LLastLevel(0), insertionPositionL2L(0), insertionPositionL2P(0), insertionPositionP2P(0),
insertionPositionP2PExtern(0), treeHeight(inTreeHeight) , maxprio(0){
if(inTreeHeight > 2){
int incPrio = 0;
FLOG( FLog::Controller << "Buckets:\n" );
insertionPositionP2MSend = incPrio++;
FLOG( FLog::Controller << "\t P2M Send " << insertionPositionP2MSend << "\n" );
insertionPositionP2M = incPrio++;
FLOG( FLog::Controller << "\t P2M " << insertionPositionP2M << "\n" );
insertionPositionM2MSend = incPrio++;
FLOG( FLog::Controller << "\t M2M Send " << insertionPositionM2MSend << "\n" );
insertionPositionM2M = incPrio++;
FLOG( FLog::Controller << "\t M2M " << insertionPositionM2M << "\n" );
insertionPositionM2L = incPrio++;
FLOG( FLog::Controller << "\t M2L " << insertionPositionM2L << "\n" );
insertionPositionM2LExtern = incPrio++;
FLOG( FLog::Controller << "\t M2L Outer " << insertionPositionM2LExtern << "\n" );
insertionPositionL2L = incPrio++;
FLOG( FLog::Controller << "\t L2L " << insertionPositionL2L << "\n" );
incPrio += (treeHeight-3) - 1; // M2L is done treeHeight-2 times
incPrio += (treeHeight-3) - 1; // M2L is done treeHeight-2 times
incPrio += (treeHeight-3) - 1; // L2L is done treeHeight-3 times
insertionPositionP2P = incPrio++;
FLOG( FLog::Controller << "\t P2P " << insertionPositionP2P << "\n" );
insertionPositionP2PExtern = incPrio++;
FLOG( FLog::Controller << "\t P2P Outer " << insertionPositionP2PExtern << "\n" );
insertionPositionM2LLastLevel = incPrio++;
FLOG( FLog::Controller << "\t M2L last " << insertionPositionM2LLastLevel << "\n" );
insertionPositionL2P = incPrio++;
FLOG( FLog::Controller << "\t L2P " << insertionPositionL2P << "\n" );
assert(incPrio == 8 + (treeHeight-3) + (treeHeight-3) + (treeHeight-3));
maxprio = incPrio;
}
else{
int incPrio = 0;
insertionPositionP2MSend = -1;
insertionPositionP2M = -1;
insertionPositionM2MSend = -1;
insertionPositionM2M = -1;
insertionPositionM2L = -1;
insertionPositionM2LExtern = -1;
insertionPositionM2LLastLevel = -1;
insertionPositionL2L = -1;
insertionPositionP2P = incPrio++;
insertionPositionP2PExtern = insertionPositionP2P;
insertionPositionL2P = -1;
assert(incPrio == 1);
maxprio = incPrio;
}
}
int getMaxPrio() const{
return maxprio;
}
int getInsertionPosP2M() const {
return insertionPositionP2M;
}
int getInsertionPosM2M(const int /*inLevel*/) const {
return insertionPositionM2M;
}
int getInsertionPosM2L(const int inLevel) const {
return (inLevel==treeHeight-1? insertionPositionM2LLastLevel : insertionPositionM2L + (inLevel - 2)*3);
}
int getInsertionPosM2LExtern(const int inLevel) const {
return (inLevel==treeHeight-1? insertionPositionM2LLastLevel : insertionPositionM2LExtern + (inLevel - 2)*3);
}
int getInsertionPosL2L(const int inLevel) const {
return insertionPositionL2L + (inLevel - 2)*3;
}
int getInsertionPosL2P() const {
return insertionPositionL2P;
}
int getInsertionPosP2P() const {
return insertionPositionP2P;
}
int getInsertionPosP2PExtern() const {
return insertionPositionP2PExtern;
}
};
#endif // FOMPPRIORITIES_HPP
......@@ -377,23 +377,13 @@ FStarPUFmmPriorities FStarPUFmmPriorities::controller;
#elif defined(SCALFMM_STARPU_USE_PRIO)// STARPU_SUPPORT_SCHEDULER
class FStarPUFmmPriorities{
static FStarPUFmmPriorities controller;
#include "FOmpPriorities.hpp"
enum Priorities{
Prio_P2M = 9 - 5,
Prio_M2M = 8 - 5,
Prio_M2L_High = 7 - 5,
Prio_L2L = 6 - 5,
Prio_P2P_Big = 5 - 5,
Prio_M2L = 4 - 5,
Prio_L2P = 3 - 5,
Prio_P2P_Small = 2 - 5
};
int treeHeight;
class FStarPUFmmPriorities {
static FStarPUFmmPriorities controller;
FOmpPriorities ompPrio;
FStarPUFmmPriorities() : treeHeight(0){
FStarPUFmmPriorities() : ompPrio(0){
}
public:
......@@ -404,38 +394,38 @@ public:
void init(struct starpu_conf* /*conf*/, const int inTreeHeight,
FStarPUKernelCapacities* /*inCapacities*/){
treeHeight = inTreeHeight;
ompPrio = FOmpPriorities(inTreeHeight);
}
int getInsertionPosP2M() const {
return Prio_P2M;
return ompPrio.getInsertionPosP2M();
}
int getInsertionPosM2M(const int /*inLevel*/) const {
return Prio_M2M;
int getInsertionPosM2M(const int inLevel) const {
return ompPrio.getInsertionPosM2M(inLevel);
}
int getInsertionPosP2M(bool /*willBeSend*/) const {
return Prio_P2M;
return ompPrio.getInsertionPosP2M();
}
int getInsertionPosM2M(const int /*inLevel*/, bool /*willBeSend*/) const {
return Prio_M2M;
int getInsertionPosM2M(const int inLevel, bool /*willBeSend*/) const {
return ompPrio.getInsertionPosM2M(inLevel);
}
int getInsertionPosM2L(const int inLevel) const {
return inLevel == treeHeight-1 ? Prio_M2L: Prio_M2L_High;
return ompPrio.getInsertionPosM2L(inLevel);
}
int getInsertionPosM2LExtern(const int inLevel) const {
return inLevel == treeHeight-1 ? Prio_M2L : Prio_M2L_High;
return ompPrio.getInsertionPosM2LExtern(inLevel);
}
int getInsertionPosL2L(const int /*inLevel*/) const {
return Prio_L2L;
int getInsertionPosL2L(const int inLevel) const {
return ompPrio.getInsertionPosL2L(inLevel);
}
int getInsertionPosL2P() const {
return Prio_L2P;
return ompPrio.getInsertionPosL2P();
}
int getInsertionPosP2P() const {
return Prio_P2P_Big;
return ompPrio.getInsertionPosP2P();
}
int getInsertionPosP2PExtern() const {
return Prio_P2P_Small;
return ompPrio.getInsertionPosP2PExtern();
}
};
......
......@@ -41,6 +41,10 @@
* @tparam ContainerClass Type of container to store particles
* @tparam MatrixKernelClass Type of matrix kernel function
* @tparam ORDER Lagrange interpolation order
*
* Related publications:
* Fast hierarchical algorithms for generating Gaussian random fields
* (https://hal.inria.fr/hal-01228519)
*/
template < class FReal, class CellClass, class ContainerClass, class MatrixKernelClass, int ORDER, int NVALS = 1>
class FAbstractUnifKernel : public FAbstractKernels< CellClass, ContainerClass>
......
......@@ -70,9 +70,6 @@ using CostKernelClass = FDumbCostKernel<FReal, CellClass, ContainerClass,
using KernelClass = FTestKernels< CellClass, ContainerClass>;
template < template <typename...> class T, class KernelClassT>
using FmmClass = T <OctreeClass, CellClass, ContainerClass, KernelClassT, LeafClass >;
const FReal epsilon = 1e-4;
......@@ -97,7 +94,7 @@ int main(int argc, char** argv)
/* Compute the cost of each tree cell *************************************/
CostKernelClass costKernel(&tree, epsilon);
FmmClass<FFmmAlgorithmThread, CostKernelClass> costAlgo(&tree, &costKernel);
FFmmAlgorithmThread<OctreeClass, CellClass, ContainerClass, CostKernelClass, LeafClass > costAlgo(&tree, &costKernel);
costAlgo.execute();
......@@ -119,8 +116,10 @@ int main(int argc, char** argv)
std::cout << "Running kernel" << std::endl;
KernelClass computeKernel;
FmmClass<FFmmAlgorithmThreadBalanced, KernelClass> fmmAlgo(&tree, &computeKernel, costzones.getZoneBounds(), costzones.getLeafZoneBounds());
//FmmClass<FFmmAlgorithm, KernelClass> fmmAlgo(&tree, &computeKernel);
FFmmAlgorithmThreadBalanced<OctreeClass, CellClass, ContainerClass, KernelClass, LeafClass >
fmmAlgo(&tree, &computeKernel, costzones.getZoneBounds(), costzones.getLeafZoneBounds());
//FFmmAlgorithm<OctreeClass, CellClass, ContainerClass, KernelClass, LeafClass >
// fmmAlgo(&tree, &computeKernel);
fmmAlgo.execute();
/**************************************************************************/
......
......@@ -90,9 +90,8 @@ foreach(exec ${source_tests_files})
target_link_libraries(
${execname}
${scalfmm_lib}
${BLAS_LIBRARIES}
${LAPACK_LIBRARIES}
${scalfmm_lib}
${SCALFMM_LIBRARIES}
${TESTS_GCOV_LINKER_FLAGS}
)
......
......@@ -40,11 +40,11 @@ class TestExclusion : public FUTester<TestExclusion> {
for(int idxY = 0 ; idxY < Size ; ++idxY){
for(int idxZ = 0 ; idxZ < Size ; ++idxZ){
if(FP2PExclusion<Width>::GetShapeIdx(idxX,idxY,idxZ) == idxShape){
for(int idxX_neig = FMath::Max(0,idxX-1) ; idxX_neig < FMath::Min(Size,idxX+1) ; ++idxX_neig){
for(int idxY_neig = FMath::Max(0,idxY-1) ; idxY_neig < FMath::Min(Size,idxY+1) ; ++idxY_neig){
for(int idxZ_neig = FMath::Max(0,idxZ-1) ; idxZ_neig < FMath::Min(Size,idxZ+1) ; ++idxZ_neig){
for(int idxX_neig = FMath::Max(0,idxX-1) ; idxX_neig < FMath::Min(Size,idxX+2) ; ++idxX_neig){
for(int idxY_neig = FMath::Max(0,idxY-1) ; idxY_neig < FMath::Min(Size,idxY+2) ; ++idxY_neig){
for(int idxZ_neig = FMath::Max(0,idxZ-1) ; idxZ_neig < FMath::Min(Size,idxZ+2) ; ++idxZ_neig){
uassert(grid[(idxX_neig*Size + idxY_neig)*Size + idxZ_neig] == 0);
grid[grid[(idxX_neig*Size + idxY_neig)*Size + idxZ_neig]] = 1;
grid[(idxX_neig*Size + idxY_neig)*Size + idxZ_neig] = 1;
}
}
}
......@@ -69,7 +69,7 @@ class TestExclusion : public FUTester<TestExclusion> {
for(int idxY_neig = FMath::Max(0,idxY-1) ; idxY_neig < idxY ; ++idxY_neig){
for(int idxZ_neig = FMath::Max(0,idxZ-1) ; idxZ_neig < idxZ ; ++idxZ_neig){
uassert(grid[(idxX_neig*Size + idxY_neig)*Size + idxZ_neig] == 0);