Commit 2bec21ce authored by BRAMAS Berenger's avatar BRAMAS Berenger
parents 24f65ca7 ef4faa86
......@@ -31,6 +31,7 @@
#include "Arranger/FArrangerPeriodic.hpp"
#include "Arranger/FBasicParticleContainerIndexedMover.hpp"
#include "Arranger/FParticleTypedIndexedMover.hpp"
#include "Extensions/FExtendCellType.hpp"
#include "Core/FFmmAlgorithmThread.hpp"
#include "Core/FFmmAlgorithm.hpp"
......@@ -38,6 +39,7 @@
#include "Core/FFmmAlgorithmThreadTsm.hpp"
/**
* @class FInterEngine implements API for Interpolations kernels, its
* templates can be ChebCell/ChebKernel or UnifCell/UnifKernel
......@@ -63,6 +65,7 @@ private:
// ArrangerClass * arranger;
public:
/**
* @brief Constructor : build the tree and the interpolation
......@@ -72,9 +75,10 @@ public:
* @param BoxCenter double[3] coordinate of the center of the
* simulation box
*/
FInterEngine(scalfmm_kernel_type KernelType) :
FInterEngine(scalfmm_kernel_type KernelType, scalfmm_algorithm algo) :
kernel(nullptr), matrix(nullptr), octree(nullptr)/*,arranger(nullptr)*/{
FScalFMMEngine<FReal>::kernelType = KernelType;
FScalFMMEngine<FReal>::Algorithm = algo;
}
void build_tree(int TreeHeight, FReal BoxWidth , FReal * BoxCenter,User_Scalfmm_Cell_Descriptor notUsedHere){
......@@ -760,11 +764,13 @@ public:
}
case 3:
{
// typedef FFmmAlgorithmThreadTsm<OctreeClass,InterCell,ContainerClass,InterKernel,LeafClass> AlgoClassTargetSource;
// AlgoClassTargetSource* algoTS = new AlgoClassTargetSource(octree,kernel);
// algoTS->execute();
// FScalFMMEngine<FReal>::algoTimer = algoTS;
// break;
// class local : public InterCell, public unExtendedCell{
// };
typedef FFmmAlgorithmThreadTsm<OctreeClass,InterCell,ContainerClass,InterKernel,LeafClass> AlgoClassTargetSource;
AlgoClassTargetSource* algoTS = new AlgoClassTargetSource(octree,kernel);
algoTS->execute();
FScalFMMEngine<FReal>::algoTimer = algoTS;
break;
}
default :
std::cout<< "No algorithm found (probably for strange reasons) : "<< FScalFMMEngine<FReal>::Algorithm <<" exiting" << std::endl;
......
......@@ -30,7 +30,7 @@ extern "C" scalfmm_handle scalfmm_init(/*int TreeHeight,double BoxWidth,double*
typedef FInterpMatrixKernelR<FReal> MatrixKernelClass;
typedef FChebSymKernel<FReal,ChebCell,ContainerClass,MatrixKernelClass,7> ChebKernel;
handle->engine = new FInterEngine<FReal,ChebCell,ChebKernel,LeafClass>(/*TreeHeight,BoxWidth,BoxCenter, */KernelType);
handle->engine = new FInterEngine<FReal,ChebCell,ChebKernel,LeafClass>(/*TreeHeight,BoxWidth,BoxCenter, */KernelType,algo);
break;
// case 2:
// //TODO typedefs
......@@ -61,13 +61,14 @@ extern "C" scalfmm_handle scalfmm_init(/*int TreeHeight,double BoxWidth,double*
case 1:
//TODO typedefs
typedef FP2PParticleContainerIndexed<FReal> ContainerClass;
typedef FChebCell<FReal,7> ChebCell;
//typedef FChebCell<FReal,7> ChebCell;
typedef FTypedChebCell<FReal,7> ChebCell;
typedef FSimpleLeaf<FReal,ContainerClass> LeafClass;
typedef FInterpMatrixKernelR<FReal> MatrixKernelClass;
typedef FChebSymKernel<FReal,ChebCell,ContainerClass,MatrixKernelClass,7> ChebKernel;
handle->engine = new FInterEngine<FReal,ChebCell,ChebKernel,LeafClass>(/*TreeHeight,BoxWidth,BoxCenter, */KernelType);
handle->engine = new FInterEngine<FReal,ChebCell,ChebKernel,LeafClass>(/*TreeHeight,BoxWidth,BoxCenter, */KernelType,algo);
break;
// case 2:
// //TODO typedefs
......
......@@ -108,6 +108,7 @@ public:
if(kernel.m2m){
for(int idx = 0 ; idx < 8 ; ++idx){
if( children[idx] ){
printf("lvl : %d\n",level);
kernel.m2m(level, cell->getContainer(), idx, children[idx]->getContainer(), userData);
}
}
......@@ -253,6 +254,7 @@ public:
void build_tree(int TreeHeight,double BoxWidth,double* BoxCenter,Scalfmm_Cell_Descriptor user_cell_descriptor){
CoreCell::Init(user_cell_descriptor);
printf("Tree Height : %d \n",TreeHeight);
this->octree = new OctreeClass(TreeHeight,FMath::Min(3,TreeHeight-1),BoxWidth,FPoint<FReal>(BoxCenter));
}
......
#ifndef TIMERS_H
#define TIMERS_H
#include <time.h>
#include <sys/time.h>
/**
* @brief Wrapper on timeval struct
*/
typedef struct timer{
struct timeval start;
struct timeval end;
}Timer;
/**
* @brief monitoring function (init start)
*/
void tic(Timer* inTimer){
gettimeofday(&inTimer->start, NULL);
}
/**
* @brief monitoring function (init end)
*/
void tac(Timer* inTimer){
gettimeofday(&inTimer->end, NULL);
}
/**
* @brief monitoring function (return elapsed time in micro second)
*/
long int get_elapsed(Timer* inTimer){
return ((inTimer->end.tv_sec * 1000000 + inTimer->end.tv_usec)
- (inTimer->start.tv_sec * 1000000 + inTimer->start.tv_usec));
}
/**
* @brief monitoring function (print elapsed)
*/
void print_elapsed(Timer* inTimer){
long int elapsed = get_elapsed(inTimer);
printf("Elapsed : %ld us (or %f seconds)\n",elapsed,elapsed/1000000.0);
}
/**
* @brief monitoring function (print difference :: First-Second)
*/
void print_difference_elapsed(Timer* inTimer1,Timer*inTimer2){
long int diff = get_elapsed(inTimer1)-get_elapsed(inTimer2);
printf("Timer Difference : %ld us (or %f seconds)\n",diff,(double)diff/1000000.0);
}
#endif
......@@ -4,8 +4,7 @@
#include <math.h>
//For timing monitoring
#include <time.h>
#include <sys/time.h>
#include "Timers.h"
#include "../Src/CScalfmmApi.h"
......@@ -60,52 +59,6 @@ void cheb_resetCell(int level, long long morton_index, int* tree_position, doubl
}
/**
* @brief Wrapper on timeval struct
*/
typedef struct timer{
struct timeval start;
struct timeval end;
}Timer;
/**
* @brief monitoring function (init start)
*/
void tic(Timer* inTimer){
gettimeofday(&inTimer->start, NULL);
}
/**
* @brief monitoring function (init end)
*/
void tac(Timer* inTimer){
gettimeofday(&inTimer->end, NULL);
}
/**
* @brief monitoring function (return elapsed time in micro second)
*/
long int get_elapsed(Timer* inTimer){
return ((inTimer->end.tv_sec * 1000000 + inTimer->end.tv_usec)
- (inTimer->start.tv_sec * 1000000 + inTimer->start.tv_usec));
}
/**
* @brief monitoring function (print elapsed)
*/
void print_elapsed(Timer* inTimer){
long int elapsed = get_elapsed(inTimer);
printf("Elapsed : %ld us (or %f seconds)\n",elapsed,elapsed/1000000.0);
}
/**
* @brief monitoring function (print difference :: First-Second)
*/
void print_difference_elapsed(Timer* inTimer1,Timer*inTimer2){
long int diff = get_elapsed(inTimer1)-get_elapsed(inTimer2);
printf("Timer Difference : %ld us (or %f seconds)\n",diff,(double)diff/1000000.0);
}
/**
* @brief Do everything
......@@ -253,7 +206,7 @@ int main(int argc, char ** av){
//Set timers
Timer interface_timer,ref_timer;
int ite=0, max_ite=5;
int ite=0, max_ite=1;
while(ite<max_ite){
//Execute FMM
tic(&interface_timer);
......
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
//For timing monitoring
#include "Timers.h"
#include "../Src/CScalfmmApi.h"
int globalId = 0;
typedef struct cell_display{
int Id; //Unique Id
int morton; //Unique Id by level
int coord[3];//Number of boxes from the corner of the big domain
int lvl; //current lvl
} Cell;
typedef struct userData{
int nbP2M;
int nbM2M;
int nbM2L;
int nbL2L;
int nbL2P;
int nbP2PFull;
} UserData;
/**
* This file is an example of the user defined kernel API, with link
* to our Chebyshev Kernel
**/
/**
* @brief Wrapper to init internal ChebCell
*/
void* displ_init_cell(int level, long long morton_index, int* tree_position, double* spatial_position, void * KernelDatas){
printf("Level : %d \t morton : %lld \ntree_positions : %d %d %d,\tspatial_position : %e %e %e\n" ,
level,morton_index,
tree_position[0],tree_position[1],tree_position[2],
spatial_position[0],spatial_position[1],spatial_position[2]);
Cell * current_cell = malloc(sizeof(Cell));
current_cell->lvl = level;
current_cell->morton = morton_index;
current_cell->Id = globalId++;
//Store tree coordinate
current_cell->coord[0] = tree_position[0];
current_cell->coord[1] = tree_position[1];
current_cell->coord[2] = tree_position[2];
return current_cell;
}
/**
* @brief Wrapper to free internal ChebCell
*/
void displ_free_cell(void * inCell){
free((Cell * )inCell);
}
/**
* @brief Wrapper to FMM operators (refer to CScalfmmApi.h to get the
* detailed descriptions)
*/
void displ_p2m(void* cellData, FSize nbParticlesInLeaf, const FSize* particleIndexes, void* userData){
Cell * current_cell = cellData;
printf("P2M with %lld parts filling cell %d ::\n",nbParticlesInLeaf,current_cell->Id);
printf("[p2m]Cell is %d/%d, level is %d, coord are %d %d %d\n\n",
current_cell->morton,current_cell->Id,
current_cell->lvl,
current_cell->coord[0],current_cell->coord[1],current_cell->coord[2]);
UserData * user_data = userData;
user_data->nbP2M += 1;
}
void displ_m2m(int level, void* parentCell, int childPosition, void* childCell, void* userData){
Cell * parent_cell = parentCell;
Cell * child_cell = childCell;
printf("M2M with %d cell and %d cell, childPosition is %d ::\n",parent_cell->Id,child_cell->Id,childPosition);
printf("\t[m2m]Parent Cell is %d, level is %d, coord are %d %d %d, and child cell is %d, level is %d, coord are %d %d %d\n\n",
parent_cell->morton,
parent_cell->lvl,
parent_cell->coord[0],parent_cell->coord[1],parent_cell->coord[2],
child_cell->morton,
child_cell->lvl,
child_cell->coord[0],child_cell->coord[1],child_cell->coord[2]);
UserData * user_data = userData;
user_data->nbM2M += 1;
}
void displ_m2l_full(int level, void* targetCell, void* sourceCell[343], void* userData){
Cell * target_cell = targetCell;
printf("M2L at lvl %d filling target cell of Id %d and morton : %d ::\n",target_cell->lvl,target_cell->Id,target_cell->morton);
int idSource = 0;
int countSource = 0;
for(idSource = 0 ; idSource<343 ; ++idSource){
if(sourceCell[idSource]){
Cell * source_cell = sourceCell[idSource];
printf("\t[m2l]Target : %d, sourceCell[%d] : morton %d, with %d,%d,%d coordinates \n",target_cell->morton,
idSource,source_cell->morton,
source_cell->coord[0],source_cell->coord[1],source_cell->coord[2]);
countSource++;
}
}
printf("\tFinished M2L at lvl %d with filling %d cell reading %d sources\n\n",level,target_cell->morton,countSource);
UserData * user_data = userData;
user_data->nbM2L += 1;
}
void displ_l2l(int level, void* parentCell, int childPosition, void* childCell, void* userData){
Cell * parent_cell = parentCell;
Cell * child_cell = childCell;
printf("L2L with %d cell and %d cell, childPosition is %d ::\n",parent_cell->Id,child_cell->Id,childPosition);
printf("\t[l2l]Parent Cell is %d, level is %d, coord are %d %d %d, Child Cell is %d, level is %d, coord are %d %d %d\n\n",
parent_cell->morton,
parent_cell->lvl,
parent_cell->coord[0],parent_cell->coord[1],parent_cell->coord[2],
child_cell->morton,
child_cell->lvl,
child_cell->coord[0],child_cell->coord[1],child_cell->coord[2]);
UserData * user_data = userData;
user_data->nbL2L += 1;
}
void displ_l2p(void* leafCell, FSize nbParticles, const FSize* particleIndexes, void* userData){
Cell * current_cell = leafCell;
printf("L2P with %lld parts reading cell %d ::\n",nbParticles,current_cell->Id);
printf("\t [l2p]Cell is %d/%d, level is %d, coord are %d %d %d\n\n",
current_cell->morton,current_cell->Id,
current_cell->lvl,
current_cell->coord[0],current_cell->coord[1],current_cell->coord[2]);
UserData * user_data = userData;
user_data->nbL2P += 1;
}
void displ_p2pFull(FSize nbParticles, const FSize* particleIndexes,
const FSize * sourceParticleIndexes[27], FSize sourceNbPart[27],void* userData) {
printf("P2P, no cells involved, only leaves\n");
UserData * user_data = userData;
user_data->nbP2PFull+= 1;
}
void displ_resetCell(int level, long long morton_index, int* tree_position, double* spatial_position, void * userCell){
printf("Removing %d cell at lvl %d... \n",((Cell*)userCell)->Id,((Cell*)userCell)->lvl);
}
/**
* @brief Do everything
* @param number of particle (no default value)
*/
int main(int argc, char ** av){
omp_set_num_threads(1);
printf("Start\n");
if(argc<2){
printf("Use : %s nb_part (optionnal : TreeHeight) \nexiting\n",av[0]);
exit(0);
}
int nbPart= atoi(av[1]);
int treeHeight = 5 ;
if(argc>2){
int treeHeight = atoi(av[2]);
}
double boxWidth = 1.0;
double boxCenter[3];
boxCenter[0] = boxCenter[1] = boxCenter[2] = 0.0;
//Allocation of the positions and physical values
double* particleXYZ = malloc(sizeof(double)*3*nbPart);
double* physicalValues = malloc(sizeof(double)*nbPart);
{
printf("Creating Particles:\n");
FSize idxPart;
for(idxPart = 0 ; idxPart < nbPart ; ++idxPart){
particleXYZ[idxPart*3] = (random()/(double)(RAND_MAX))*boxWidth - boxWidth/2 + boxCenter[0];
particleXYZ[idxPart*3+1] = (random()/(double)(RAND_MAX))*boxWidth - boxWidth/2 + boxCenter[1];
particleXYZ[idxPart*3+2] = (random()/(double)(RAND_MAX))*boxWidth - boxWidth/2 + boxCenter[2];
physicalValues[idxPart] = 1.0;
}
}
{//This part will write generated particles to a file at ScalFMM
//format in order to verify numercal results
/* FILE * fd = fopen("input.fma","w"); */
/* fprintf(fd,"8\t 4\n %d\n %f\t %f\t %f\t %f\n",nbPart,boxWidth/2.0, boxCenter[0],boxCenter[1],boxCenter[2]); */
/* FSize idxPart; */
/* for(idxPart=0 ; idxPart<nbPart ; ++idxPart){ */
/* fprintf(fd,"%e\t %e\t %e\t %e \n", */
/* particleXYZ[idxPart*3], */
/* particleXYZ[idxPart*3+1], */
/* particleXYZ[idxPart*3+2], */
/* physicalValues[idxPart]); */
/* } */
/* fclose(fd); */
}
scalfmm_handle handle = scalfmm_init(user_defined_kernel,multi_thread);
//Struct for user defined kernel
struct User_Scalfmm_Cell_Descriptor cellDescriptor;
cellDescriptor.user_init_cell = displ_init_cell;
cellDescriptor.user_free_cell = displ_free_cell;
// Init tree and cell
printf("Building the tree and Initizalizing the cells:\n");
scalfmm_build_tree(handle,treeHeight, boxWidth, boxCenter, cellDescriptor);
//Once is the tree built, one must set the kernel before inserting particles
//Set our callbacks
struct User_Scalfmm_Kernel_Descriptor kernel;
kernel.p2m = displ_p2m;
kernel.m2m = displ_m2m;
//init the other to NULL
kernel.m2l = NULL;
kernel.m2l_full = displ_m2l_full;
kernel.l2l = displ_l2l;
kernel.l2p = displ_l2p;
kernel.p2p_full = displ_p2pFull;
kernel.p2pinner = NULL;
kernel.p2p = NULL;
//Set my datas
UserData userDatas;
userDatas.nbP2M = 0;
userDatas.nbM2M = 0;
userDatas.nbM2L = 0;
userDatas.nbL2L = 0;
userDatas.nbL2P = 0;
userDatas.nbP2PFull = 0;
//Give ScalFMM the datas before calling fmm (this will set as well the kernel)
scalfmm_user_kernel_config(handle,kernel,&userDatas);
// Insert particles
printf("Inserting particles...\n");
scalfmm_tree_insert_particles_xyz(handle, nbPart, particleXYZ,BOTH);
//Set timers
Timer interface_timer,ref_timer;
int ite=0, max_ite=1;
while(ite<max_ite){
//Execute FMM
printf("\n\tStart FMM \n \n");
tic(&interface_timer);
scalfmm_execute_fmm(handle/*, kernel, &my_data*/);
tac(&interface_timer);
printf("User defined Chebyshev done\n");
print_elapsed(&interface_timer);
printf("FMM finished: \n\tNumber of P2M : %d\n\tNumber of M2M : %d\n\tNumber of M2L : %d\n\tNumber of L2L : %d\n\tNumber of L2P : %d\n\tNumber of P2P : %d\n",
userDatas.nbP2M,
userDatas.nbM2M,
userDatas.nbM2L,
userDatas.nbL2L,
userDatas.nbL2P,
userDatas.nbP2PFull);
scalfmm_reset_tree(handle,displ_resetCell);
ite++;
}
printf("Free the kernels\n");
printf("Free the Handles ...\n");
scalfmm_dealloc_handle(handle,displ_free_cell);
free(particleXYZ);
return EXIT_SUCCESS;
}
This diff is collapsed.
......@@ -113,9 +113,9 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/")
IF (APPLE)
set(SSE_FLAGS "-msse4 -mfpmath=sse") # -mtune=native -march=native
else(APPLE)
set(AVX_FLAGS "-mavx")
set(AVX2_FLAGS "-mavx2")
set(SSE_FLAGS "-axSSE4.2")
set(AVX_FLAGS "-mavx -march=native")
set(AVX2_FLAGS "-mavx2 -march=native")
set(SSE_FLAGS "-axSSE4.2 -march=native")
endif(APPLE)
#-Wshadow -Wpointer-arith -Wcast-qual -Wconversion -Wall -Wnosign-conversion ")
else()
......@@ -179,15 +179,13 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/")
##############################################################################
# Release flags #
##############################################################################
set(CMAKE_BUILD_TYPE Release)
# force -O3 in release
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3")
string(REPLACE "-O2" "-O3" CMAKE_CXX_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELEASE} )
set(CMAKE_CXX_FLAGS_TYPE "${CMAKE_CXX_FLAGS_RELEASE}")
# if compiler is intel add -ip
if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
set(SCALFMM_CXX_FLAGS "${SCALFMM_CXX_FLAGS} -ip")
# set(SCALFMM_CXX_FLAGS "${SCALFMM_CXX_FLAGS} -ip -no-prec-div -no-prec-sqrt")
set(SCALFMM_CXX_FLAGS "${SCALFMM_CXX_FLAGS} -ip -no-prec-div -no-prec-sqrt")
endif()
# Compile optimization
if(APPLE)
......@@ -195,11 +193,11 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/")
"Set your optimization flags for release mode.")
else(APPLE)
# Not apple system - Check the compiler flags
if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
# set(SCALFMM_FLAGS_OPTI_RELEASE "-fp-model precise -fp-model source -fimf-precision=low -funroll-loops -ftree-vectorize"
# CACHE STRING "Set your optimization flags for release mode.")
set(SCALFMM_FLAGS_OPTI_RELEASE " " CACHE STRING
"Set your optimization flags for release mode.")
if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
set(SCALFMM_FLAGS_OPTI_RELEASE "-fp-model precise -fp-model source -fimf-precision=low -funroll-loops -ftree-vectorize"
CACHE STRING "Set your optimization flags for release mode.")
# set(SCALFMM_FLAGS_OPTI_RELEASE "-funroll-loops -ftree-vectorize" CACHE STRING
# "Set your optimization flags for release mode.")
else()
set(SCALFMM_FLAGS_OPTI_RELEASE "-ffast-math -funroll-loops -ftree-vectorize" CACHE STRING
"Set your optimization flags for release mode.")
......@@ -208,6 +206,9 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/")
#
set(SCALFMM_CXX_FLAGS "${SCALFMM_CXX_FLAGS} ${SCALFMM_FLAGS_OPTI_RELEASE}")
endif()
MESSAGE(STATUS " %%%%%%%%%% SCALFMM_CXX_FLAGS ${SCALFMM_CXX_FLAGS} %%%%%%%%%%%%%")
MESSAGE(STATUS " %%%%%%%%%% CMAKE_CXX_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELEASE} %%%%%%%%%%%%%")
#
##############################################################################
# Attach source code to exec #
......@@ -537,15 +538,20 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/")
${SCALFMM_CMAKE_MODULE_PATH}/compileTestAvx.cpp
COMPILE_DEFINITIONS "${CMAKE_CXX_FLAGS} ${AVX_FLAGS}"
OUTPUT_VARIABLE COMPILE_AVX_OUTPUT)
if(${COMPILE_AVX})
if(${COMPILE_AVX})
message(STATUS "%%%%%%%%%%%% COMPILE_AVX = ${COMPILE_AVX} %%%%< ${AVX_FLAGS}")
set(SCALFMM_CXX_FLAGS "${SCALFMM_CXX_FLAGS} ${AVX_FLAGS}")
#set( SCALFMM_USE_SSE OFF FORCE) # ne marche pas
message(STATUS "%%%%%%%%%%%% SCALFMM_CXX_FLAGS = ${SCALFMM_CXX_FLAGS}")
#set( SCALFMM_USE_SSE OFF FORCE) # ne marche pas
try_compile(COMPILE_RESULT_AVSPE ${CMAKE_CURRENT_BINARY_DIR}
${SCALFMM_CMAKE_MODULE_PATH}/checkAVXpe.cpp
COMPILE_DEFINITIONS "${CMAKE_CXX_FLAGS} ${AVX_FLAGS}")
if( NOT ${COMPILE_RESULT_AVSPE})
if( NOT ${COMPILE_RESULT_AVSPE})
set(__AVXPE_INTEL_COMPILER ON)
endif()
endif()
message(STATUS ${CMAKE_CXX_FLAGS} )
else(${COMPILE_AVX})
......@@ -770,7 +776,7 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/")
#set(CMAKE_CXX_FLAGS "${SCALFMM_CXX_FLAGS} ${CMAKE_CXX_FLAGS}")
message(STATUS "SCALFMM_USE_BLAS = ${SCALFMM_USE_BLAS}")
message(STATUS "SCALFMM_USE_FFT = ${SCALFMM_USE_FFT}")
message(STATUS "SCALFMM_USE_MKL = TODO")
message(STATUS "SCALFMM_USE_MKL = ${SCALFMM_USE_MKL}")
#
message(STATUS "CMAKE_CXX_FLAGS = ${CMAKE_CXX_FLAGS}")
message(STATUS "SCALFMM_CXX_FLAGS = ${SCALFMM_CXX_FLAGS}")
......
......@@ -150,7 +150,8 @@ int main(int argc, char* argv[])
//
KernelClass kernels(TreeHeight, loader.getBoxWidth(), loader.getCenterOfBox(),&MatrixKernel);
//
FmmClass algo(&tree, &kernels);
// false : dynamic schedule.
FmmClass algo(&tree, &kernels,true);
//
algo.execute(); // Here the call of the FMM algorithm
//
......
......@@ -40,7 +40,7 @@
* Of course this class does not deallocate pointer given in arguements.
*/
template<class OctreeClass, class CellClass, class ContainerClass, class KernelClass, class LeafClass>
class FFmmAlgorithmSectionTask : public FAbstractAlgorithm{
class FFmmAlgorithmSectionTask : public FAbstractAlgorithm, public FAlgorithmTimers {
OctreeClass* const tree; //< The octree to work on
KernelClass** kernels; //< The kernels
......@@ -99,24 +99,38 @@ protected:
{
#pragma omp section
{
Timers[P2MTimer].tic();
if(operationsToProceed & FFmmP2M) bottomPass();
Timers[P2MTimer].tac();
Timers[M2MTimer].tic();
if(operationsToProceed & FFmmM2M) upwardPass();
Timers[M2MTimer].tac();
Timers[M2LTimer].tic();
if(operationsToProceed & FFmmM2L) transferPass();
Timers[M2LTimer].tac();
Timers[L2LTimer].tic();
if(operationsToProceed & FFmmL2L) downardPass();
Timers[L2LTimer].tac();
}
#pragma omp section
{
if(operationsToProceed & FFmmP2P) directPass();
Timers[P2PTimer].tic();
if( operationsToProceed & FFmmP2P ) directPass();
Timers[P2PTimer].tac();
}
}
#pragma omp single
{
Timers[L2PTimer].tic();
if(operationsToProceed & FFmmL2P) L2PPass();
Timers[L2PTimer].tac();
}
}
Timers[NearTimer] = Timers[L2PTimer] + Timers[P2PTimer];
}
/////////////////////////////////////////////////////////////////////////////
......@@ -321,7 +335,7 @@ protected:
FLOG( computationCounter.tic() );
for( int idxShape = 0 ; idxShape < SizeShape ; ++idxShape){
const int nbLeaf = shapes[idxShape].getSize();
const FSize nbLeaf = shapes[idxShape].getSize();
for(int iterLeaf = 0 ; iterLeaf < nbLeaf ; ++iterLeaf ){
typename OctreeClass::Iterator toWork = shapes[idxShape][iterLeaf];
#pragma omp task firstprivate(neighbors, toWork)
......
......@@ -80,12 +80,12 @@ public:
* The constructor needs the octree and the kernels used for computation.
* \param inTree the octree to work on.
* \param inKernels the kernels to call.
* \param inStaticSchedule Whether to use static or dynamic OpenMP scheduling.
* \param inStaticSchedule Whether to use static or dynamic OpenMP scheduling. default is dynamic.
*
* \except An exception is thrown if one of the arguments is NULL.
*/
FFmmAlgorithmThread(OctreeClass* const inTree, KernelClass* const inKernels,
const bool inStaticSchedule = true, const int inLeafLevelSeperationCriteria = 1)
const bool inStaticSchedule = false, const int inLeafLevelSeperationCriteria = 1)
: tree(inTree) , kernels(nullptr), iterArray(nullptr), leafsNumber(0),
MaxThreads(omp_get_max_threads()), OctreeHeight(tree->getHeight()),
staticSchedule(inStaticSchedule), leafLevelSeperationCriteria(inLeafLevelSeperationCriteria) {
......
......@@ -60,6 +60,27 @@ public:
tic();
}
FTic(const FTic& other) : start(other.start), end(other.end), cumulate(other.cumulate) {}
FTic(FTic&& other) : start(other.start), end(other.end), cumulate(other.cumulate) {}
FTic& operator=(const FTic& other) {
start = other.start;
end = other.end;
cumulate = other.cumulate;
return *this;
}
const FTic operator+(const FTic& other) const {
FTic res(*this);
res.start = start < other.start ? start : other.start;
res.end = end > other.end ? end : other.end;
res.cumulate += other.cumulate;
return res;
}
/** Tic : start <= current time */
void tic(){
this->start = FTic::GetTime();
......