Commit d89a32ac authored by Quentin Khan's avatar Quentin Khan
Browse files
parents 85c2ced6 faa790fc
......@@ -1016,6 +1016,27 @@ extern "C" int scalfmm_get_nb_timers(scalfmm_handle Handle){
// ((ScalFmmCoreHandle<double> * ) Handle)->engine->tree_abstract_insert(NbPartToInsert,nbAttributeToInsert,strideForEachAtt,rawDatas);
// }
/**
* These functions are just translating functions.
*/
//< This function fill the childFullPosition[3] with [-1;1] to know the position of a child relatively to
//< its position from its parent
extern "C" void scalfmm_utils_parentChildPosition(int childPosition, int* childFullPosition){
childFullPosition[2] = (childPosition%2 ? 1 : -1);
childFullPosition[1] = ((childPosition/2)%2 ? 1 : -1);
childFullPosition[0] = ((childPosition/4)%2 ? 1 : -1);
}
//< This function fill the childFullPosition[3] with [-3;3] to know the position of a interaction
//< cell relatively to its position from the target
extern "C" void scalfmm_utils_interactionPosition(int interactionPosition, int* srcPosition){
srcPosition[2] = interactionPosition%7 - 3;
srcPosition[1] = (interactionPosition/7)%7 - 3;
srcPosition[0] = (interactionPosition/49)%7 - 3;
}
extern "C" void scalfmm_reset_tree(scalfmm_handle Handle, Callback_reset_cell cellReseter){
((ScalFmmCoreHandle<double> * ) Handle)->engine->reset_tree(cellReseter);
}
......
......@@ -28,7 +28,7 @@ int main(int argc, char ** av){
double boxCenter[3] = {0.0,0.0,0.0};
//Init our lib
scalfmm_handle handle = scalfmm_init(/* TreeHeight,boxWidth,boxCenter, */myChoice); //The tree is built
scalfmm_handle handle = scalfmm_init(/* TreeHeight,boxWidth,boxCenter, */myChoice,multi_thread); //The tree is built
struct User_Scalfmm_Cell_Descriptor user_descr;
user_descr.user_init_cell = NULL;
user_descr.user_free_cell = NULL;
......@@ -54,10 +54,10 @@ int main(int argc, char ** av){
}
//Inserting the array in the tree
scalfmm_tree_insert_particles_xyz(handle,nb_of_parts,positionsXYZ);
scalfmm_tree_insert_particles_xyz(handle,nb_of_parts,positionsXYZ,BOTH);
//Set the charge
scalfmm_set_physical_values(handle,nb_of_parts,array_of_charge);
scalfmm_set_physical_values(handle,nb_of_parts,array_of_charge,BOTH);
//Computation Part
......@@ -80,9 +80,9 @@ int main(int argc, char ** av){
scalfmm_execute_fmm(handle);
//Get the resulting forces
scalfmm_get_forces_xyz(handle,nb_of_parts,array_of_forces);
scalfmm_get_forces_xyz(handle,nb_of_parts,array_of_forces,BOTH);
//Get the resulting potential
scalfmm_get_potentials(handle,nb_of_parts,array_of_pot);
scalfmm_get_potentials(handle,nb_of_parts,array_of_pot,BOTH);
......@@ -90,7 +90,7 @@ int main(int argc, char ** av){
compute_displacement_from_forces(array_of_forces,array_of_charge,array_of_displacement,nb_of_parts);
//get position in order to display
scalfmm_get_positions_xyz(handle,nb_of_parts,positionsXYZ);
scalfmm_get_positions_xyz(handle,nb_of_parts,positionsXYZ,BOTH);
//Display forces :
{
......@@ -109,7 +109,7 @@ int main(int argc, char ** av){
}
}
//Apply displacement computed
scalfmm_add_to_positions_xyz(handle,nb_of_parts,array_of_displacement);
scalfmm_add_to_positions_xyz(handle,nb_of_parts,array_of_displacement,BOTH);
//Update Consequently the tree
scalfmm_update_tree(handle);
......
......@@ -47,7 +47,7 @@ struct MyCellDescriptor{
};
// This is our function that init a cell (struct MyCellDescriptor)
void* my_Callback_init_cell(int level, long long mortonIndex, int* coord, double* position){
void* my_Callback_init_cell(int level, long long mortonIndex, int* coord, double* position, void * kernel){
VerbosePrint("\tAllocating cell for level %d, morton index %lld, coord %d/%d/%d\n", level, mortonIndex, coord[0], coord[1], coord[2]);
struct MyCellDescriptor* cellData = (struct MyCellDescriptor*)malloc(sizeof(struct MyCellDescriptor));
memset(cellData, 0, sizeof(struct MyCellDescriptor));
......@@ -88,7 +88,7 @@ struct MyData {
// Our P2M
void my_Callback_P2M(void* cellData, FSize nbParticlesInLeaf, const int* particleIndexes, void* userData){
void my_Callback_P2M(void* cellData, FSize nbParticlesInLeaf, const FSize* particleIndexes, void* userData){
struct MyData* my_data = (struct MyData*)userData;
my_data->countP2M += 1;
......@@ -152,7 +152,7 @@ void my_Callback_L2L(int level, void* cellData, int childPosition, void* childDa
// JUST-PUT-HERE: Your L2L
}
void my_Callback_L2P(void* cellData, FSize nbParticlesInLeaf, const int* particleIndexes, void* userData){
void my_Callback_L2P(void* cellData, FSize nbParticlesInLeaf, const FSize* particleIndexes, void* userData){
struct MyData* my_data = (struct MyData*)userData;
my_data->countL2P += 1;
......@@ -167,7 +167,7 @@ void my_Callback_L2P(void* cellData, FSize nbParticlesInLeaf, const int* particl
}
}
void my_Callback_P2P(FSize nbParticlesInLeaf, const int* particleIndexes, FSize nbParticlesInSrc, const int* particleIndexesSrc, void* userData){
void my_Callback_P2P(FSize nbParticlesInLeaf, const FSize* particleIndexes, FSize nbParticlesInSrc, const FSize* particleIndexesSrc, void* userData){
struct MyData* my_data = (struct MyData*)userData;
my_data->countP2P += 1;
......@@ -189,7 +189,7 @@ void my_Callback_P2P(FSize nbParticlesInLeaf, const int* particleIndexes, FSize
// interacting with the target particles
}
void my_Callback_P2PInner(FSize nbParticlesInLeaf, const int* particleIndexes, void* userData){
void my_Callback_P2PInner(FSize nbParticlesInLeaf, const FSize* particleIndexes, void* userData){
struct MyData* my_data = (struct MyData*)userData;
my_data->countP2PInner += 1;
......@@ -233,7 +233,7 @@ int main(int argc, char ** argv){
}
// Init the handle
scalfmm_handle handle = scalfmm_init(user_defined_kernel);
scalfmm_handle handle = scalfmm_init(user_defined_kernel,multi_thread);
//Build our own call backs
struct User_Scalfmm_Cell_Descriptor cellDescriptor;
......@@ -245,7 +245,7 @@ int main(int argc, char ** argv){
scalfmm_build_tree(handle,treeHeight, boxWidth, boxCenter, cellDescriptor);
// Insert particles
printf("Inserting particles...\n");
scalfmm_tree_insert_particles_xyz(handle, nbParticles, particleXYZ);
scalfmm_tree_insert_particles_xyz(handle, nbParticles, particleXYZ,BOTH);
printf("Particles Inserted ...\n");
// Init our callback struct
......@@ -253,10 +253,12 @@ int main(int argc, char ** argv){
kernel.p2m = my_Callback_P2M;
kernel.m2m = my_Callback_M2M;
kernel.m2l = my_Callback_M2L;
kernel.m2l_full = NULL;
kernel.l2l = my_Callback_L2L;
kernel.l2p = my_Callback_L2P;
kernel.p2pinner = my_Callback_P2PInner;
kernel.p2p = my_Callback_P2P;
kernel.p2p_full = NULL;
// Init the data to pass to all our callbacks
struct MyData my_data;
......@@ -277,7 +279,7 @@ int main(int argc, char ** argv){
// Execute the FMM
scalfmm_execute_fmm(handle/*, kernel, &my_data*/);
printf("FMM finished ... \n");
scalfmm_get_positions_xyz(handle,nbParticles,new_positions);
scalfmm_get_positions_xyz(handle,nbParticles,new_positions,BOTH);
//Computation on those positions
//Here it's a random
int id;
......@@ -288,7 +290,7 @@ int main(int argc, char ** argv){
}
printf("Positions changed \n");
scalfmm_set_positions_xyz(handle,nbParticles,new_positions);
scalfmm_set_positions_xyz(handle,nbParticles,new_positions,BOTH);
scalfmm_update_tree(handle);
curr_ite++;
}
......
......@@ -248,10 +248,10 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/")
set(SCALFMM_INCLUDES "${SCALFMM_INCLUDES}; ${MPI_CXX_INCLUDE_PATH}")
endif()
if (MPI_CXX_LINK_FLAGS)
set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES}; ${MPI_CXX_LINK_FLAGS}")
set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES};${MPI_CXX_LINK_FLAGS}")
endif()
if (MPI_CXX_LIBRARIES)
set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES}; ${MPI_CXX_LIBRARIES}")
set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES};${MPI_CXX_LIBRARIES}")
endif()
endif()
......@@ -292,7 +292,7 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/")
endif()
if(BLAS_FOUND)
set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES}; ${BLASLAPACK_LIBRARIES}")
set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES};${BLASLAPACK_LIBRARIES}")
#message(STATUS "SCALFMM_LIBRARIES = ${SCALFMM_LIBRARIES}")
else()
message(WARNING "BLAS has not been found, SCALFMM will continue to compile but some applications will be disabled.")
......@@ -335,12 +335,12 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/")
endif()
if (FFTW_LIBRARIES_DEP)
foreach (fft_lib ${FFTW_LIBRARIES_DEP})
set(FFT_LIBRARIES "${FFT_LIBRARIES} ${fft_lib};")
set(FFT_LIBRARIES "${FFT_LIBRARIES};${fft_lib};")
endforeach()
endif()
set(FFT_INCLUDES "${FFTW_INCLUDE_DIRS_DEP}" )
if (FFT_LIBRARIES)
set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES}; ${FFT_LIBRARIES}")
set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES};${FFT_LIBRARIES}")
endif()
endif(SCALFMM_USE_MKL_AS_BLAS)
......@@ -357,12 +357,12 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/")
endif()
if (FFTW_LIBRARIES_DEP)
foreach (fft_lib ${FFTW_LIBRARIES_DEP})
set(FFT_LIBRARIES "${FFT_LIBRARIES} ${fft_lib};")
set(FFT_LIBRARIES "${FFT_LIBRARIES};${fft_lib};")
endforeach()
endif()
set(FFT_INCLUDES "${FFTW_INCLUDE_DIRS_DEP}" )
if (FFT_LIBRARIES)
set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES}; ${FFT_LIBRARIES}")
set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES};${FFT_LIBRARIES}")
endif()
endif(SCALFMM_USE_MKL_AS_FFTW)
......@@ -434,7 +434,7 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/")
include_directories(${CUDA_INCLUDE_DIRS})
endif()
if (CUDA_LIBRARIES)
set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES}; ${CUDA_LIBRARIES}")
set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES};${CUDA_LIBRARIES}")
endif()
set(CUDA_NEEDED_INCLUDE_DIRS ${CMAKE_BINARY_DIR}/Src)
......@@ -458,28 +458,29 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/")
include_directories(${STARPU_INCLUDE_DIRS_DEP})
foreach (starpu_libdir ${STARPU_LIBRARY_DIRS_DEP})
if (${starpu_libdir} MATCHES "^ *-L")
set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES}; ${starpu_libdir}")
set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES};${starpu_libdir}")
else()
set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES}; -L${starpu_libdir}")
set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES};-L${starpu_libdir}")
endif()
endforeach()
foreach (starpu_lib ${STARPU_LIBRARIES_DEP})
if (EXISTS ${starpu_lib} OR ${starpu_lib} MATCHES "^ *-")
set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES}; ${starpu_lib}")
set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES};${starpu_lib}")
else()
set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES}; -l${starpu_lib}")
set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES};-l${starpu_lib}")
endif()
endforeach()
# TODO: is this very useful? CUDA is already a component of find starpu
if (CUDA_LIBRARIES)
set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES}; ${CUDA_LIBRARIES}")
set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES};${CUDA_LIBRARIES}")
endif()
# Message
message(STATUS " STARPU_LIBRARIES = ${STARPU_LIBRARIES}")
if (STARPU_INCLUDE_DIRS)
message(STATUS " STARPU_INCLUDES = ${STARPU_INCLUDE_DIRS}")
set(SCALFMM_INCLUDES "${SCALFMM_INCLUDES}; ${STARPU_INCLUDE_DIRS}")
endif()
# TODO: replace this by a component of find starpu
......@@ -487,7 +488,7 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/")
MESSAGE( STATUS "SCALFMM_USE_OPENCL = ${SCALFMM_USE_OPENCL}" )
if(SCALFMM_USE_OPENCL)
include_directories($ENV{OPENCL_INC})
SET(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES}; -L$ENV{OPENCL_LIB}; -lOpenCL")
SET(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES};-L$ENV{OPENCL_LIB};-lOpenCL")
endif()
endif(SCALFMM_USE_STARPU)
list(APPEND FUSE_LIST "STARPU")
......@@ -631,8 +632,12 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/")
#
set(SCALFMM_COMPILE_FLAGS "${SCALFMM_CXX_FLAGS} ${CMAKE_CXX_FLAGS_TYPE} ${CMAKE_CXX_FLAGS}")
set(CMAKE_CXX_FLAGS "${SCALFMM_COMPILE_FLAGS}")
# set(CMAKE_CXX_FLAGS "${SCALFMM_COMPILE_FLAGS} ${CMAKE_CXX_FLAGS_TYPE} ${CMAKE_CXX_FLAGS}")
set(SCALFMM_COMPILE_LIBS "${SCALFMM_LIBRARIES}")
# We need the libraries without spaces (inside the config file)
set(SCALFMM_COMPILE_LIBS "")
foreach(lib_var ${SCALFMM_LIBRARIES})
string(STRIP ${lib_var} lib_var)
LIST(APPEND SCALFMM_COMPILE_LIBS ${lib_var})
endforeach()
configure_file( ${CMAKE_CURRENT_SOURCE_DIR}/Src/ScalFmmConfig.h.cmake
${CMAKE_BINARY_DIR}/Src/ScalFmmConfig.h )
#
......
......@@ -150,8 +150,9 @@ int main(int argc, char* argv[])
//
KernelClass kernels(TreeHeight, loader.getBoxWidth(), loader.getCenterOfBox(),&MatrixKernel);
//
// false : dynamic schedule.
FmmClass algo(&tree, &kernels,true);
// false : dynamic schedule.
int inUserChunckSize = 10; // To specify the chunck size in the loops (-1 is static, 0 is N/p^2, otherwise i)
FmmClass algo(&tree, &kernels, inUserChunckSize);
//
algo.execute(); // Here the call of the FMM algorithm
//
......
......@@ -11,13 +11,13 @@ version 1.1
-----
- The choice of floating point values is done by template (by passing template to all classes) and no more at configure time
- There is no more ScalFMM variables and defines but only SCALFMM (all letters in upper case)
- BLAS and FFTW are turned on by default, and the compilation continue even if they are not found
- We start to use unique ptr (and advise to use it) in the main files
- Add interpolation FMM based on uniform grid points.
- Add blocked version of the algorithm to increase the granularity (task-based approach)
- Fix bug with Chebyschev kernel and multi RHS.
- Improvements in unit tests (works with Linux and MacOs X for intel and gcc)
-
- Use MORSE distribution to detect BLAS, FFTW, ....
- BLAS and FFTW are turned on by default, and the compilation continue even if they are not found
1.2.1
-----
......
......@@ -15,7 +15,7 @@ SET(SCALFMM_PREFIX "@CMAKE_INSTALL_PREFIX@")
SET(SCALFMM_INCLUDES_DIR "@CMAKE_INSTALL_PREFIX@/include")
SET(SCALFMM_INCLUDES_ADD "@SCALFMM_INCLUDES@")
SET(SCALFMM_LIBRARIES_DIR "@CMAKE_INSTALL_PREFIX@/lib")
SET(SCALFMM_LIBRARIES_ADD "@SCALFMM_LIBRARIES@")
SET(SCALFMM_LIBRARIES_ADD "@SCALFMM_COMPILE_LIBS@")
SET(SCALFMM_LIBRARY_NAME "@scalfmm_lib@")
SET(ScaLFMM_CXX_FLAGS "@SCALFMM_CXX_FLAGS@")
#
......
......@@ -436,7 +436,7 @@ private:
// truncated singular value decomposition of matrix
const unsigned int info = FBlas::gesvd(nnodes, nnodes, U, S, VT, nnodes, LWORK, WORK);
if (info!=0) throw std::runtime_error("SVD did not converge with " + info);
if (info!=0) throw std::runtime_error("SVD did not converge with " + std::to_string(info));
const unsigned int rank = this->getRank(S, Epsilon);
// store
......
// ===================================================================================
// Copyright ScalFmm 2011 INRIA, Olivier Coulaud, Bérenger Bramas, Matthias Messner
// Copyright ScalFmm 2011 INRIA, Olivier Coulaud, Berenger Bramas, Matthias Messner
// olivier.coulaud@inria.fr, berenger.bramas@inria.fr
// This software is a computer program whose purpose is to compute the FMM.
//
......@@ -300,7 +300,7 @@ protected:
} while(octreeIterator.moveRight());
FLOG( FLog::Controller << "\tFinished (@Direct Pass (L2P + P2P) = " << counterTime.tacAndElapsed() << "s)\n" );
FLOG( FLog::Controller << "\tFinished (@Direct Pass (L2P + P2P) = " << counterTime.tacAndElapsed() << " s)\n" );
FLOG( FLog::Controller << "\t\t Computation L2P : " << computationCounterL2P.cumulated() << " s\n" );
FLOG( FLog::Controller << "\t\t Computation P2P : " << computationCounterP2P.cumulated() << " s\n" );
......
......@@ -60,28 +60,26 @@ class FFmmAlgorithmThread : public FAbstractAlgorithm, public FAlgorithmTimers{
const int OctreeHeight; ///< The height of the given tree.
const bool staticSchedule;
const int userChunkSize;
const int leafLevelSeperationCriteria;
unsigned int ompChunkSize = 0;
public:
/** Class constructor
*
* The constructor needs the octree and the kernels used for computation.
* \param inTree the octree to work on.
* \param inKernels the kernels to call.
* \param inStaticSchedule Whether to use static or dynamic OpenMP scheduling. default is dynamic.
* \param inUserChunckSize To specify the chunck size in the loops (-1 is static, 0 is N/p^2, otherwise it
* directly used as the number of item to proceed together), default is 10
*
* \except An exception is thrown if one of the arguments is NULL.
*/
FFmmAlgorithmThread(OctreeClass* const inTree, KernelClass* const inKernels,
const bool inStaticSchedule = false, const int inLeafLevelSeperationCriteria = 1)
const int inUserChunkSize = 10, const int inLeafLevelSeperationCriteria = 1)
: tree(inTree) , kernels(nullptr), iterArray(nullptr), leafsNumber(0),
MaxThreads(omp_get_max_threads()), OctreeHeight(tree->getHeight()),
staticSchedule(inStaticSchedule), leafLevelSeperationCriteria(inLeafLevelSeperationCriteria) {
userChunckSize(inUserChunckSize), leafLevelSeperationCriteria(inLeafLevelSeperationCriteria) {
FAssertLF(tree, "tree cannot be null");
this->kernels = new KernelClass*[MaxThreads];
......@@ -96,7 +94,7 @@ public:
FAbstractAlgorithm::setNbLevelsInTree(tree->getHeight());
FLOG(FLog::Controller << "FFmmAlgorithmThread (Max Thread " << omp_get_max_threads() << ")\n");
FLOG(FLog::Controller << "\t static schedule " << (staticSchedule?"TRUE":"FALSE") << ")\n");
FLOG(FLog::Controller << "\t static schedule " << (userChunckSize == -1?"static":(userChunckSize == 0?"N/p^2":userChunckSize)) << ")\n");
}
/** Default destructor */
......@@ -107,26 +105,20 @@ public:
delete [] this->kernels;
}
template <class NumType>
NumType getChunkSize(const NumType inSize) const {
if(staticSchedule){
if(userChunkSize <= -1){
return FMath::Max(NumType(1) , NumType(double(inSize)/double(omp_get_max_threads())) );
} else if(userChunkSize == 0){
return FMath::Max(NumType(1) , inSize/NumType(omp_get_max_threads()*omp_get_max_threads()));
} else {
return userChunkSize;
}
else{
if (ompChunkSize > 0) {
return ompChunkSize;
}
else {
return FMath::Max(NumType(1) , inSize/NumType(omp_get_max_threads()*omp_get_max_threads()));
}
}
}
}
template <class NumType>
void setChunkSize(const NumType size) {
ompChunkSize = size;
userChunkSize = size;
}
protected:
......
......@@ -30,7 +30,7 @@ protected:
size_t v[9];
};
static void SetKernelArgs(cl_kernel& kernel, const int pos){
static void SetKernelArgs(cl_kernel& /*kernel*/, const int /*pos*/){
}
template <class ParamClass, class... Args>
static void SetKernelArgs(cl_kernel& kernel, const int pos, ParamClass* param, Args... args){
......
......@@ -45,8 +45,8 @@
* See documentation of FTreeLoader, FKernelLoader, FAlgoLoader.
*/
template <class TreeLoader,
template <typename TL> class KernelLoader,
template <typename TL, template <typename TL> class KL> class AlgoLoader>
template <typename TL_1> class KernelLoader,
template <typename TL_2, template <typename TL_3> class KL> class AlgoLoader>
void runperf(FPerfTestParams& params)
{
TreeLoader treeLoader(params);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment