diff --git a/Addons/CKernelApi/Src/FScalFMMEngine.hpp b/Addons/CKernelApi/Src/FScalFMMEngine.hpp index 363f1b66ebdbd1b34e40b401c322fc7d989ccb0b..2de26e11a0d548614c0bb6ca4c2ca2ba62687139 100644 --- a/Addons/CKernelApi/Src/FScalFMMEngine.hpp +++ b/Addons/CKernelApi/Src/FScalFMMEngine.hpp @@ -1016,6 +1016,27 @@ extern "C" int scalfmm_get_nb_timers(scalfmm_handle Handle){ // ((ScalFmmCoreHandle<double> * ) Handle)->engine->tree_abstract_insert(NbPartToInsert,nbAttributeToInsert,strideForEachAtt,rawDatas); // } +/** + * These functions are just translating functions. + */ + +//< This function fill the childFullPosition[3] with [-1;1] to know the position of a child relatively to +//< its position from its parent +extern "C" void scalfmm_utils_parentChildPosition(int childPosition, int* childFullPosition){ + childFullPosition[2] = (childPosition%2 ? 1 : -1); + childFullPosition[1] = ((childPosition/2)%2 ? 1 : -1); + childFullPosition[0] = ((childPosition/4)%2 ? 1 : -1); +} + +//< This function fill the childFullPosition[3] with [-3;3] to know the position of a interaction +//< cell relatively to its position from the target +extern "C" void scalfmm_utils_interactionPosition(int interactionPosition, int* srcPosition){ + srcPosition[2] = interactionPosition%7 - 3; + srcPosition[1] = (interactionPosition/7)%7 - 3; + srcPosition[0] = (interactionPosition/49)%7 - 3; +} + + extern "C" void scalfmm_reset_tree(scalfmm_handle Handle, Callback_reset_cell cellReseter){ ((ScalFmmCoreHandle<double> * ) Handle)->engine->reset_tree(cellReseter); } diff --git a/Addons/CKernelApi/Tests/testUseNewApi.c b/Addons/CKernelApi/Tests/testUseNewApi.c index 203da8dbc107ffe8739c59f11068e6b258d58f51..ce5ecca95eb32b27cdd95631efc22ec6b5819c4d 100644 --- a/Addons/CKernelApi/Tests/testUseNewApi.c +++ b/Addons/CKernelApi/Tests/testUseNewApi.c @@ -28,7 +28,7 @@ int main(int argc, char ** av){ double boxCenter[3] = {0.0,0.0,0.0}; //Init our lib - scalfmm_handle handle = scalfmm_init(/* TreeHeight,boxWidth,boxCenter, */myChoice); //The tree is built + scalfmm_handle handle = scalfmm_init(/* TreeHeight,boxWidth,boxCenter, */myChoice,multi_thread); //The tree is built struct User_Scalfmm_Cell_Descriptor user_descr; user_descr.user_init_cell = NULL; user_descr.user_free_cell = NULL; @@ -54,10 +54,10 @@ int main(int argc, char ** av){ } //Inserting the array in the tree - scalfmm_tree_insert_particles_xyz(handle,nb_of_parts,positionsXYZ); + scalfmm_tree_insert_particles_xyz(handle,nb_of_parts,positionsXYZ,BOTH); //Set the charge - scalfmm_set_physical_values(handle,nb_of_parts,array_of_charge); + scalfmm_set_physical_values(handle,nb_of_parts,array_of_charge,BOTH); //Computation Part @@ -80,9 +80,9 @@ int main(int argc, char ** av){ scalfmm_execute_fmm(handle); //Get the resulting forces - scalfmm_get_forces_xyz(handle,nb_of_parts,array_of_forces); + scalfmm_get_forces_xyz(handle,nb_of_parts,array_of_forces,BOTH); //Get the resulting potential - scalfmm_get_potentials(handle,nb_of_parts,array_of_pot); + scalfmm_get_potentials(handle,nb_of_parts,array_of_pot,BOTH); @@ -90,7 +90,7 @@ int main(int argc, char ** av){ compute_displacement_from_forces(array_of_forces,array_of_charge,array_of_displacement,nb_of_parts); //get position in order to display - scalfmm_get_positions_xyz(handle,nb_of_parts,positionsXYZ); + scalfmm_get_positions_xyz(handle,nb_of_parts,positionsXYZ,BOTH); //Display forces : { @@ -109,7 +109,7 @@ int main(int argc, char ** av){ } } //Apply displacement computed - scalfmm_add_to_positions_xyz(handle,nb_of_parts,array_of_displacement); + scalfmm_add_to_positions_xyz(handle,nb_of_parts,array_of_displacement,BOTH); //Update Consequently the tree scalfmm_update_tree(handle); diff --git a/Addons/CKernelApi/Tests/testUserDefinedKernelApi.c b/Addons/CKernelApi/Tests/testUserDefinedKernelApi.c index 6d51948b3aa4bc1b62c7ef631b5eb2b99d6724df..72dd9da8245f5d0b31991b80870777cf25f7abb5 100644 --- a/Addons/CKernelApi/Tests/testUserDefinedKernelApi.c +++ b/Addons/CKernelApi/Tests/testUserDefinedKernelApi.c @@ -47,7 +47,7 @@ struct MyCellDescriptor{ }; // This is our function that init a cell (struct MyCellDescriptor) -void* my_Callback_init_cell(int level, long long mortonIndex, int* coord, double* position){ +void* my_Callback_init_cell(int level, long long mortonIndex, int* coord, double* position, void * kernel){ VerbosePrint("\tAllocating cell for level %d, morton index %lld, coord %d/%d/%d\n", level, mortonIndex, coord[0], coord[1], coord[2]); struct MyCellDescriptor* cellData = (struct MyCellDescriptor*)malloc(sizeof(struct MyCellDescriptor)); memset(cellData, 0, sizeof(struct MyCellDescriptor)); @@ -88,7 +88,7 @@ struct MyData { // Our P2M -void my_Callback_P2M(void* cellData, FSize nbParticlesInLeaf, const int* particleIndexes, void* userData){ +void my_Callback_P2M(void* cellData, FSize nbParticlesInLeaf, const FSize* particleIndexes, void* userData){ struct MyData* my_data = (struct MyData*)userData; my_data->countP2M += 1; @@ -152,7 +152,7 @@ void my_Callback_L2L(int level, void* cellData, int childPosition, void* childDa // JUST-PUT-HERE: Your L2L } -void my_Callback_L2P(void* cellData, FSize nbParticlesInLeaf, const int* particleIndexes, void* userData){ +void my_Callback_L2P(void* cellData, FSize nbParticlesInLeaf, const FSize* particleIndexes, void* userData){ struct MyData* my_data = (struct MyData*)userData; my_data->countL2P += 1; @@ -167,7 +167,7 @@ void my_Callback_L2P(void* cellData, FSize nbParticlesInLeaf, const int* particl } } -void my_Callback_P2P(FSize nbParticlesInLeaf, const int* particleIndexes, FSize nbParticlesInSrc, const int* particleIndexesSrc, void* userData){ +void my_Callback_P2P(FSize nbParticlesInLeaf, const FSize* particleIndexes, FSize nbParticlesInSrc, const FSize* particleIndexesSrc, void* userData){ struct MyData* my_data = (struct MyData*)userData; my_data->countP2P += 1; @@ -189,7 +189,7 @@ void my_Callback_P2P(FSize nbParticlesInLeaf, const int* particleIndexes, FSize // interacting with the target particles } -void my_Callback_P2PInner(FSize nbParticlesInLeaf, const int* particleIndexes, void* userData){ +void my_Callback_P2PInner(FSize nbParticlesInLeaf, const FSize* particleIndexes, void* userData){ struct MyData* my_data = (struct MyData*)userData; my_data->countP2PInner += 1; @@ -233,7 +233,7 @@ int main(int argc, char ** argv){ } // Init the handle - scalfmm_handle handle = scalfmm_init(user_defined_kernel); + scalfmm_handle handle = scalfmm_init(user_defined_kernel,multi_thread); //Build our own call backs struct User_Scalfmm_Cell_Descriptor cellDescriptor; @@ -245,7 +245,7 @@ int main(int argc, char ** argv){ scalfmm_build_tree(handle,treeHeight, boxWidth, boxCenter, cellDescriptor); // Insert particles printf("Inserting particles...\n"); - scalfmm_tree_insert_particles_xyz(handle, nbParticles, particleXYZ); + scalfmm_tree_insert_particles_xyz(handle, nbParticles, particleXYZ,BOTH); printf("Particles Inserted ...\n"); // Init our callback struct @@ -253,10 +253,12 @@ int main(int argc, char ** argv){ kernel.p2m = my_Callback_P2M; kernel.m2m = my_Callback_M2M; kernel.m2l = my_Callback_M2L; + kernel.m2l_full = NULL; kernel.l2l = my_Callback_L2L; kernel.l2p = my_Callback_L2P; kernel.p2pinner = my_Callback_P2PInner; kernel.p2p = my_Callback_P2P; + kernel.p2p_full = NULL; // Init the data to pass to all our callbacks struct MyData my_data; @@ -277,7 +279,7 @@ int main(int argc, char ** argv){ // Execute the FMM scalfmm_execute_fmm(handle/*, kernel, &my_data*/); printf("FMM finished ... \n"); - scalfmm_get_positions_xyz(handle,nbParticles,new_positions); + scalfmm_get_positions_xyz(handle,nbParticles,new_positions,BOTH); //Computation on those positions //Here it's a random int id; @@ -288,7 +290,7 @@ int main(int argc, char ** argv){ } printf("Positions changed \n"); - scalfmm_set_positions_xyz(handle,nbParticles,new_positions); + scalfmm_set_positions_xyz(handle,nbParticles,new_positions,BOTH); scalfmm_update_tree(handle); curr_ite++; } diff --git a/CMakeLists.txt b/CMakeLists.txt index b5220b006ad9bd500054aae4d37e5580533fe45b..aa11f4ce94981cec24af88aca832fd84cd87a439 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -248,10 +248,10 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/") set(SCALFMM_INCLUDES "${SCALFMM_INCLUDES}; ${MPI_CXX_INCLUDE_PATH}") endif() if (MPI_CXX_LINK_FLAGS) - set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES}; ${MPI_CXX_LINK_FLAGS}") + set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES};${MPI_CXX_LINK_FLAGS}") endif() if (MPI_CXX_LIBRARIES) - set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES}; ${MPI_CXX_LIBRARIES}") + set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES};${MPI_CXX_LIBRARIES}") endif() endif() @@ -292,7 +292,7 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/") endif() if(BLAS_FOUND) - set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES}; ${BLASLAPACK_LIBRARIES}") + set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES};${BLASLAPACK_LIBRARIES}") #message(STATUS "SCALFMM_LIBRARIES = ${SCALFMM_LIBRARIES}") else() message(WARNING "BLAS has not been found, SCALFMM will continue to compile but some applications will be disabled.") @@ -335,12 +335,12 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/") endif() if (FFTW_LIBRARIES_DEP) foreach (fft_lib ${FFTW_LIBRARIES_DEP}) - set(FFT_LIBRARIES "${FFT_LIBRARIES} ${fft_lib};") + set(FFT_LIBRARIES "${FFT_LIBRARIES};${fft_lib};") endforeach() endif() set(FFT_INCLUDES "${FFTW_INCLUDE_DIRS_DEP}" ) if (FFT_LIBRARIES) - set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES}; ${FFT_LIBRARIES}") + set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES};${FFT_LIBRARIES}") endif() endif(SCALFMM_USE_MKL_AS_BLAS) @@ -357,12 +357,12 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/") endif() if (FFTW_LIBRARIES_DEP) foreach (fft_lib ${FFTW_LIBRARIES_DEP}) - set(FFT_LIBRARIES "${FFT_LIBRARIES} ${fft_lib};") + set(FFT_LIBRARIES "${FFT_LIBRARIES};${fft_lib};") endforeach() endif() set(FFT_INCLUDES "${FFTW_INCLUDE_DIRS_DEP}" ) if (FFT_LIBRARIES) - set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES}; ${FFT_LIBRARIES}") + set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES};${FFT_LIBRARIES}") endif() endif(SCALFMM_USE_MKL_AS_FFTW) @@ -434,7 +434,7 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/") include_directories(${CUDA_INCLUDE_DIRS}) endif() if (CUDA_LIBRARIES) - set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES}; ${CUDA_LIBRARIES}") + set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES};${CUDA_LIBRARIES}") endif() set(CUDA_NEEDED_INCLUDE_DIRS ${CMAKE_BINARY_DIR}/Src) @@ -458,28 +458,29 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/") include_directories(${STARPU_INCLUDE_DIRS_DEP}) foreach (starpu_libdir ${STARPU_LIBRARY_DIRS_DEP}) if (${starpu_libdir} MATCHES "^ *-L") - set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES}; ${starpu_libdir}") + set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES};${starpu_libdir}") else() - set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES}; -L${starpu_libdir}") + set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES};-L${starpu_libdir}") endif() endforeach() foreach (starpu_lib ${STARPU_LIBRARIES_DEP}) if (EXISTS ${starpu_lib} OR ${starpu_lib} MATCHES "^ *-") - set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES}; ${starpu_lib}") + set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES};${starpu_lib}") else() - set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES}; -l${starpu_lib}") + set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES};-l${starpu_lib}") endif() endforeach() # TODO: is this very useful? CUDA is already a component of find starpu if (CUDA_LIBRARIES) - set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES}; ${CUDA_LIBRARIES}") + set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES};${CUDA_LIBRARIES}") endif() # Message message(STATUS " STARPU_LIBRARIES = ${STARPU_LIBRARIES}") if (STARPU_INCLUDE_DIRS) message(STATUS " STARPU_INCLUDES = ${STARPU_INCLUDE_DIRS}") + set(SCALFMM_INCLUDES "${SCALFMM_INCLUDES}; ${STARPU_INCLUDE_DIRS}") endif() # TODO: replace this by a component of find starpu @@ -487,7 +488,7 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/") MESSAGE( STATUS "SCALFMM_USE_OPENCL = ${SCALFMM_USE_OPENCL}" ) if(SCALFMM_USE_OPENCL) include_directories($ENV{OPENCL_INC}) - SET(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES}; -L$ENV{OPENCL_LIB}; -lOpenCL") + SET(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES};-L$ENV{OPENCL_LIB};-lOpenCL") endif() endif(SCALFMM_USE_STARPU) list(APPEND FUSE_LIST "STARPU") @@ -631,8 +632,12 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/") # set(SCALFMM_COMPILE_FLAGS "${SCALFMM_CXX_FLAGS} ${CMAKE_CXX_FLAGS_TYPE} ${CMAKE_CXX_FLAGS}") set(CMAKE_CXX_FLAGS "${SCALFMM_COMPILE_FLAGS}") -# set(CMAKE_CXX_FLAGS "${SCALFMM_COMPILE_FLAGS} ${CMAKE_CXX_FLAGS_TYPE} ${CMAKE_CXX_FLAGS}") - set(SCALFMM_COMPILE_LIBS "${SCALFMM_LIBRARIES}") + # We need the libraries without spaces (inside the config file) + set(SCALFMM_COMPILE_LIBS "") + foreach(lib_var ${SCALFMM_LIBRARIES}) + string(STRIP ${lib_var} lib_var) + LIST(APPEND SCALFMM_COMPILE_LIBS ${lib_var}) + endforeach() configure_file( ${CMAKE_CURRENT_SOURCE_DIR}/Src/ScalFmmConfig.h.cmake ${CMAKE_BINARY_DIR}/Src/ScalFmmConfig.h ) # diff --git a/Examples/ChebyshevInterpolationFMM.cpp b/Examples/ChebyshevInterpolationFMM.cpp index 25c1515f11e5e2bc2e06d0885d570fc297d738b3..beb004d74af80a4c869d00d7986dceb7436c12d7 100644 --- a/Examples/ChebyshevInterpolationFMM.cpp +++ b/Examples/ChebyshevInterpolationFMM.cpp @@ -150,8 +150,9 @@ int main(int argc, char* argv[]) // KernelClass kernels(TreeHeight, loader.getBoxWidth(), loader.getCenterOfBox(),&MatrixKernel); // - // false : dynamic schedule. - FmmClass algo(&tree, &kernels,true); + // false : dynamic schedule. + int inUserChunckSize = 10; // To specify the chunck size in the loops (-1 is static, 0 is N/p^2, otherwise i) + FmmClass algo(&tree, &kernels, inUserChunckSize); // algo.execute(); // Here the call of the FMM algorithm // diff --git a/NEWS.txt b/NEWS.txt index d471a8f1fa5e8b3a22e860d398cc92f85bba3d75..7126bc6e3e5a6eb4a905e7d573a46c5bd1d0ba0b 100644 --- a/NEWS.txt +++ b/NEWS.txt @@ -11,13 +11,13 @@ version 1.1 ----- - The choice of floating point values is done by template (by passing template to all classes) and no more at configure time - There is no more ScalFMM variables and defines but only SCALFMM (all letters in upper case) -- BLAS and FFTW are turned on by default, and the compilation continue even if they are not found - We start to use unique ptr (and advise to use it) in the main files - Add interpolation FMM based on uniform grid points. - Add blocked version of the algorithm to increase the granularity (task-based approach) - Fix bug with Chebyschev kernel and multi RHS. - Improvements in unit tests (works with Linux and MacOs X for intel and gcc) -- +- Use MORSE distribution to detect BLAS, FFTW, .... +- BLAS and FFTW are turned on by default, and the compilation continue even if they are not found 1.2.1 ----- diff --git a/ScalFMMConfig.cmake.in b/ScalFMMConfig.cmake.in index 6a932a54da571ae45304582e564300b939de8bab..bc683cab353236228569d6264bb3f4c1c8e69c9d 100644 --- a/ScalFMMConfig.cmake.in +++ b/ScalFMMConfig.cmake.in @@ -15,7 +15,7 @@ SET(SCALFMM_PREFIX "@CMAKE_INSTALL_PREFIX@") SET(SCALFMM_INCLUDES_DIR "@CMAKE_INSTALL_PREFIX@/include") SET(SCALFMM_INCLUDES_ADD "@SCALFMM_INCLUDES@") SET(SCALFMM_LIBRARIES_DIR "@CMAKE_INSTALL_PREFIX@/lib") -SET(SCALFMM_LIBRARIES_ADD "@SCALFMM_LIBRARIES@") +SET(SCALFMM_LIBRARIES_ADD "@SCALFMM_COMPILE_LIBS@") SET(SCALFMM_LIBRARY_NAME "@scalfmm_lib@") SET(ScaLFMM_CXX_FLAGS "@SCALFMM_CXX_FLAGS@") # diff --git a/Src/BalanceTree/FChebSymCostKernel.hpp b/Src/BalanceTree/FChebSymCostKernel.hpp index 58dab287cc56323f1a8734556fea09fe3e20759a..e106167c6faab15cb9055a7a8dde5902245246d0 100644 --- a/Src/BalanceTree/FChebSymCostKernel.hpp +++ b/Src/BalanceTree/FChebSymCostKernel.hpp @@ -436,7 +436,7 @@ private: // truncated singular value decomposition of matrix const unsigned int info = FBlas::gesvd(nnodes, nnodes, U, S, VT, nnodes, LWORK, WORK); - if (info!=0) throw std::runtime_error("SVD did not converge with " + info); + if (info!=0) throw std::runtime_error("SVD did not converge with " + std::to_string(info)); const unsigned int rank = this->getRank(S, Epsilon); // store diff --git a/Src/Core/FFmmAlgorithm.hpp b/Src/Core/FFmmAlgorithm.hpp index 4804b459dbdca726f5f8b0133e1acab2af4abba3..3a8fffc259d63a41508f197cc35a60a9599b38a5 100644 --- a/Src/Core/FFmmAlgorithm.hpp +++ b/Src/Core/FFmmAlgorithm.hpp @@ -1,5 +1,5 @@ // =================================================================================== -// Copyright ScalFmm 2011 INRIA, Olivier Coulaud, Bérenger Bramas, Matthias Messner +// Copyright ScalFmm 2011 INRIA, Olivier Coulaud, Berenger Bramas, Matthias Messner // olivier.coulaud@inria.fr, berenger.bramas@inria.fr // This software is a computer program whose purpose is to compute the FMM. // @@ -300,7 +300,7 @@ protected: } while(octreeIterator.moveRight()); - FLOG( FLog::Controller << "\tFinished (@Direct Pass (L2P + P2P) = " << counterTime.tacAndElapsed() << "s)\n" ); + FLOG( FLog::Controller << "\tFinished (@Direct Pass (L2P + P2P) = " << counterTime.tacAndElapsed() << " s)\n" ); FLOG( FLog::Controller << "\t\t Computation L2P : " << computationCounterL2P.cumulated() << " s\n" ); FLOG( FLog::Controller << "\t\t Computation P2P : " << computationCounterP2P.cumulated() << " s\n" ); diff --git a/Src/Core/FFmmAlgorithmThread.hpp b/Src/Core/FFmmAlgorithmThread.hpp index e6868111cb8af4b9aa8bd6d92b8eec494bd48251..f35e3aaa253b429d246c8544e759ead4e80d5284 100644 --- a/Src/Core/FFmmAlgorithmThread.hpp +++ b/Src/Core/FFmmAlgorithmThread.hpp @@ -60,28 +60,26 @@ class FFmmAlgorithmThread : public FAbstractAlgorithm, public FAlgorithmTimers{ const int OctreeHeight; ///< The height of the given tree. - const bool staticSchedule; + const int userChunkSize; const int leafLevelSeperationCriteria; - unsigned int ompChunkSize = 0; - public: /** Class constructor * * The constructor needs the octree and the kernels used for computation. * \param inTree the octree to work on. * \param inKernels the kernels to call. - * \param inStaticSchedule Whether to use static or dynamic OpenMP scheduling. default is dynamic. + * \param inUserChunckSize To specify the chunck size in the loops (-1 is static, 0 is N/p^2, otherwise it + * directly used as the number of item to proceed together), default is 10 * * \except An exception is thrown if one of the arguments is NULL. */ FFmmAlgorithmThread(OctreeClass* const inTree, KernelClass* const inKernels, - const bool inStaticSchedule = false, const int inLeafLevelSeperationCriteria = 1) + const int inUserChunkSize = 10, const int inLeafLevelSeperationCriteria = 1) : tree(inTree) , kernels(nullptr), iterArray(nullptr), leafsNumber(0), MaxThreads(omp_get_max_threads()), OctreeHeight(tree->getHeight()), - staticSchedule(inStaticSchedule), leafLevelSeperationCriteria(inLeafLevelSeperationCriteria) { - + userChunckSize(inUserChunckSize), leafLevelSeperationCriteria(inLeafLevelSeperationCriteria) { FAssertLF(tree, "tree cannot be null"); this->kernels = new KernelClass*[MaxThreads]; @@ -96,7 +94,7 @@ public: FAbstractAlgorithm::setNbLevelsInTree(tree->getHeight()); FLOG(FLog::Controller << "FFmmAlgorithmThread (Max Thread " << omp_get_max_threads() << ")\n"); - FLOG(FLog::Controller << "\t static schedule " << (staticSchedule?"TRUE":"FALSE") << ")\n"); + FLOG(FLog::Controller << "\t static schedule " << (userChunckSize == -1?"static":(userChunckSize == 0?"N/p^2":userChunckSize)) << ")\n"); } /** Default destructor */ @@ -107,26 +105,20 @@ public: delete [] this->kernels; } - template <class NumType> NumType getChunkSize(const NumType inSize) const { - if(staticSchedule){ + if(userChunkSize <= -1){ return FMath::Max(NumType(1) , NumType(double(inSize)/double(omp_get_max_threads())) ); + } else if(userChunkSize == 0){ + return FMath::Max(NumType(1) , inSize/NumType(omp_get_max_threads()*omp_get_max_threads())); + } else { + return userChunkSize; } - else{ - if (ompChunkSize > 0) { - return ompChunkSize; - } - else { - return FMath::Max(NumType(1) , inSize/NumType(omp_get_max_threads()*omp_get_max_threads())); - } - } - } - + } template <class NumType> void setChunkSize(const NumType size) { - ompChunkSize = size; + userChunkSize = size; } protected: diff --git a/Src/GroupTree/OpenCl/FOpenCLDeviceWrapper.hpp b/Src/GroupTree/OpenCl/FOpenCLDeviceWrapper.hpp index d716d086f23c7e1b614cc89284098a48448189d3..ad274fac9fafa52bda66e2470858390a4159f3b1 100644 --- a/Src/GroupTree/OpenCl/FOpenCLDeviceWrapper.hpp +++ b/Src/GroupTree/OpenCl/FOpenCLDeviceWrapper.hpp @@ -30,7 +30,7 @@ protected: size_t v[9]; }; - static void SetKernelArgs(cl_kernel& kernel, const int pos){ + static void SetKernelArgs(cl_kernel& /*kernel*/, const int /*pos*/){ } template <class ParamClass, class... Args> static void SetKernelArgs(cl_kernel& kernel, const int pos, ParamClass* param, Args... args){ diff --git a/Tests/noDist/PerfTest.cpp b/Tests/noDist/PerfTest.cpp index c8963e910f07f27c48cca9aa456d6aabafc18f64..0023b21399ce45cace50d51159e71422d4056b1c 100644 --- a/Tests/noDist/PerfTest.cpp +++ b/Tests/noDist/PerfTest.cpp @@ -45,8 +45,8 @@ * See documentation of FTreeLoader, FKernelLoader, FAlgoLoader. */ template <class TreeLoader, - template <typename TL> class KernelLoader, - template <typename TL, template <typename TL> class KL> class AlgoLoader> + template <typename TL_1> class KernelLoader, + template <typename TL_2, template <typename TL_3> class KL> class AlgoLoader> void runperf(FPerfTestParams& params) { TreeLoader treeLoader(params);