diff --git a/CMakeLists.txt b/CMakeLists.txt index b29e59e05f108e1f153926085803f320dd6da729..acb986ac4b329748c60187fbeced9fc5888267e9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -12,7 +12,7 @@ set(CMAKE_DISABLE_IN_SOURCE_BUILD ON) # Project Declaration #=========================================================================== project(SCALFMM C CXX) -INCLUDE( CMakeDependentOption ) + # check if compiling into source directories string(COMPARE EQUAL "${CMAKE_SOURCE_DIR}" "${CMAKE_BINARY_DIR}" insource) if(insource) @@ -21,12 +21,12 @@ endif(insource) list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/CMakeModules/) set(SCALFMM_CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/CMakeModules) - -include(GetCpuInfos) -GetCpuInfos() - +# # Adds the CMAKE_DEPENDENT_OPTION command INCLUDE(CMakeDependentOption) +# Add to check CPU info +include(GetCpuInfos) +GetCpuInfos() #=========================================================================== # Version Number @@ -45,15 +45,14 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/") # Add extra cmake module path and initialize morse cmake modules # -------------------------------------------------------------- if(MORSE_DISTRIB_DIR) - list(APPEND CMAKE_MODULE_PATH ${MORSE_DISTRIB_DIR}/cmake_modules) - list(APPEND CMAKE_MODULE_PATH "${MORSE_DISTRIB_DIR}/cmake_modules/morse") - set(MORSE_CMAKE_MODULE_PATH ${MORSE_DISTRIB_DIR}/cmake_modules/morse ) + list(APPEND CMAKE_MODULE_PATH ${MORSE_DISTRIB_DIR}/cmake_modules) + list(APPEND CMAKE_MODULE_PATH "${MORSE_DISTRIB_DIR}/cmake_modules/morse") + set(MORSE_CMAKE_MODULE_PATH ${MORSE_DISTRIB_DIR}/cmake_modules/morse ) elseif(EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/") - list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/CMakeModules/morse/) - set(MORSE_CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/CMakeModules/morse ) + list(APPEND CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/CMakeModules/morse/) + set(MORSE_CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/CMakeModules/morse ) endif() include(MorseInit) - # # Options option( SCALFMM_USE_MPI "Set to ON to build ScaFMM with MPI" OFF ) @@ -70,11 +69,11 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/") option( SCALFMM_ATTACHE_SOURCE "Set to ON to compile with -g" OFF ) option( SCALFMM_USE_ADDONS "Set to ON to compile add ons" OFF ) if( APPLE ) # to fix problem with GCC and avx - CMAKE_DEPENDENT_OPTION( SCALFMM_USE_SSE "Set to ON to compile with SSE support (and use intrinsec SSE P2P)" ON "CPUOPTION_SSE3;NOT CPUOPTION_AVX2" OFF ) - CMAKE_DEPENDENT_OPTION( SCALFMM_USE_AVX "Set to ON to compile with AVX support (and use intrinsec AVX P2P)" OFF "CPUOPTION_AVX; NOT CPUOPTION_AVX2" OFF ) + CMAKE_DEPENDENT_OPTION( SCALFMM_USE_SSE "Set to ON to compile with SSE support (and use intrinsec SSE P2P)" ON "CPUOPTION_SSE3;NOT CPUOPTION_AVX2" OFF ) + CMAKE_DEPENDENT_OPTION( SCALFMM_USE_AVX "Set to ON to compile with AVX support (and use intrinsec AVX P2P)" OFF "CPUOPTION_AVX; NOT CPUOPTION_AVX2" OFF ) else(APPLE) - CMAKE_DEPENDENT_OPTION( SCALFMM_USE_SSE "Set to ON to compile with SSE support (and use intrinsec SSE P2P)" ON "CPUOPTION_SSE3;NOT CPUOPTION_AVX;NOT CPUOPTION_AVX2" OFF ) - CMAKE_DEPENDENT_OPTION( SCALFMM_USE_AVX "Set to ON to compile with AVX support (and use intrinsec AVX P2P)" ON "CPUOPTION_AVX; NOT CPUOPTION_AVX2" OFF ) + CMAKE_DEPENDENT_OPTION( SCALFMM_USE_SSE "Set to ON to compile with SSE support (and use intrinsec SSE P2P)" ON "CPUOPTION_SSE3;NOT CPUOPTION_AVX;NOT CPUOPTION_AVX2" OFF ) + CMAKE_DEPENDENT_OPTION( SCALFMM_USE_AVX "Set to ON to compile with AVX support (and use intrinsec AVX P2P)" ON "CPUOPTION_AVX; NOT CPUOPTION_AVX2" OFF ) endif(APPLE) CMAKE_DEPENDENT_OPTION( SCALFMM_USE_AVX2 "Set to ON to compile with AVX support (and use intrinsec AVXZ P2P)" ON "CPUOPTION_AVX2" OFF ) option( SCALFMM_USE_ASSERT "Set to ON to enable safe tests during execution" ON ) @@ -89,10 +88,11 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/") option( SCALFMM_DISABLE_NATIVE_OMP4 "Set to ON to disable the gcc/intel omp4" OFF ) option( SCALFMM_TIME_OMPTASKS "Set to ON to time omp4 tasks and generate output file" OFF ) endif() + message(STATUS "AVANT ${CMAKE_CXX_COMPILER_ID}" ) if( SCALFMM_USE_MPI ) try_compile(COMPILE_INTEL ${CMAKE_CURRENT_BINARY_DIR} - ${SCALFMM_CMAKE_MODULE_PATH}/compileTestIntel.cpp - COMPILE_DEFINITIONS "${CMAKE_CXX_FLAGS}") + ${SCALFMM_CMAKE_MODULE_PATH}/compileTestIntel.cpp + COMPILE_DEFINITIONS "${CMAKE_CXX_FLAGS}") if (COMPILE_INTEL) set(CMAKE_CXX_COMPILER_ID "Intel") endif() @@ -101,6 +101,7 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/") # Set scalfmm to default libraries set(SCALFMM_LIBRARIES "") set(SCALFMM_CXX_FLAGS "-std=c++11 -fpic -Wall") + MESSAGE(STATUS "FLAGS =$CALFMM_CXX_FLAGS") # # # Test if openmp is here @@ -127,7 +128,11 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/") if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel") # INTEL IF (APPLE) - set(SSE_FLAGS "-msse4 -mfpmath=sse") # -mtune=native -march=native + IF( CPUOPTION_SSE42 ) + set(SSE_FLAGS "-msse4 -mfpmath=sse") # -mtune=native -march=native + ELSEIF (CPUOPTION_SSE3) + set(SSE_FLAGS "-msse3 -mfpmath=sse") # -mtune=native -march=native + ENDIF (CPUOPTION_SSE42) else(APPLE) set(AVX_FLAGS "-march=native -axCORE-AVX2,CORE-AVX-I,AVX") #-mavx set(AVX2_FLAGS "-march=native -axCORE-AVX2,CORE-AVX-I") #-march=core-avx2 @@ -150,8 +155,12 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/") endif() endif() IF (APPLE) - set(SSE_FLAGS "-msse4 -mfpmath=sse") # -mtune=native -march=native - set(SSE_FLAGS "-msse3 -mfpmath=sse") + # set(SSE_FLAGS "-msse4 -mfpmath=sse") # -mtune=native -march=native + IF( CPUOPTION_SSE42 ) + set(SSE_FLAGS "-msse4 -mfpmath=sse") # -mtune=native -march=native + ELSEIF (CPUOPTION_SSE3) + set(SSE_FLAGS "-msse3 -mfpmath=sse") # -mtune=native -march=native + ENDIF (CPUOPTION_SSE42) set(AVX_FLAGS "-mtune=native -march=avx") set(AVX2_FLAGS "-mtune=native -march=native -mmic") else(APPLE) @@ -212,12 +221,12 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/") "Set your optimization flags for release mode.") else(APPLE) # Not apple system - Check the compiler flags - if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel") - #set(SCALFMM_FLAGS_OPTI_RELEASE "-fp-model precise -fp-model source -fimf-precision=low -funroll-loops -ftree-vectorize" - set(SCALFMM_FLAGS_OPTI_RELEASE "-funroll-loops -ftree-vectorize" - CACHE STRING "Set your optimization flags for release mode.") - # set(SCALFMM_FLAGS_OPTI_RELEASE "-funroll-loops -ftree-vectorize" CACHE STRING - # "Set your optimization flags for release mode.") + if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel") + #set(SCALFMM_FLAGS_OPTI_RELEASE "-fp-model precise -fp-model source -fimf-precision=low -funroll-loops -ftree-vectorize" + set(SCALFMM_FLAGS_OPTI_RELEASE "-funroll-loops -ftree-vectorize" + CACHE STRING "Set your optimization flags for release mode.") + # set(SCALFMM_FLAGS_OPTI_RELEASE "-funroll-loops -ftree-vectorize" CACHE STRING + # "Set your optimization flags for release mode.") else() set(SCALFMM_FLAGS_OPTI_RELEASE "-ffast-math -funroll-loops -ftree-vectorize" CACHE STRING "Set your optimization flags for release mode.") @@ -226,8 +235,8 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/") # set(SCALFMM_CXX_FLAGS "${SCALFMM_CXX_FLAGS} ${SCALFMM_FLAGS_OPTI_RELEASE}") endif() - MESSAGE(STATUS " %%%%%%%%%% SCALFMM_CXX_FLAGS ${SCALFMM_CXX_FLAGS} %%%%%%%%%%%%%") - MESSAGE(STATUS " %%%%%%%%%% CMAKE_CXX_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELEASE} %%%%%%%%%%%%%") + MESSAGE(STATUS " %%%%%%%%%% SCALFMM_CXX_FLAGS ${SCALFMM_CXX_FLAGS} %%%%%%%%%%%%%") + MESSAGE(STATUS " %%%%%%%%%% CMAKE_CXX_FLAGS_RELEASE ${CMAKE_CXX_FLAGS_RELEASE} %%%%%%%%%%%%%") # ############################################################################## @@ -249,28 +258,28 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/") # -DMPI_C_COMPILER=path/to/mpicc -DMPI_CXX_COMPILER=path/to/mpicxx # at cmake configure if(NOT MPI_C_COMPILER) - set(MPI_C_COMPILER mpicc) + set(MPI_C_COMPILER mpicc) endif() if(NOT MPI_CXX_COMPILER) - set(MPI_CXX_COMPILER mpicxx) + set(MPI_CXX_COMPILER mpicxx) endif() find_package(MPI REQUIRED) if (MPI_CXX_INCLUDE_PATH) - include_directories( ${MPI_CXX_INCLUDE_PATH} ) + include_directories( ${MPI_CXX_INCLUDE_PATH} ) endif() if (MPI_CXX_COMPILE_FLAGS) - set(SCALFMM_CXX_FLAGS "${SCALFMM_CXX_FLAGS} ${MPI_CXX_COMPILE_FLAGS}") + set(SCALFMM_CXX_FLAGS "${SCALFMM_CXX_FLAGS} ${MPI_CXX_COMPILE_FLAGS}") endif() if (MPI_CXX_INCLUDE_PATH) - set(SCALFMM_INCLUDES "${SCALFMM_INCLUDES}; ${MPI_CXX_INCLUDE_PATH}") + set(SCALFMM_INCLUDES "${SCALFMM_INCLUDES}; ${MPI_CXX_INCLUDE_PATH}") endif() if (MPI_CXX_LINK_FLAGS) - set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES};${MPI_CXX_LINK_FLAGS}") + set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES};${MPI_CXX_LINK_FLAGS}") endif() if (MPI_CXX_LIBRARIES) - set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES};${MPI_CXX_LIBRARIES}") + set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES};${MPI_CXX_LIBRARIES}") endif() endif() @@ -286,7 +295,7 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/") if( SCALFMM_USE_MKL_AS_BLAS ) set(BLA_VENDOR "Intel10_64lp_seq") - find_package(BLASEXT) # not REQUIRED + find_package(BLASEXT QUIET) # not REQUIRED if(BLAS_LIBRARY_DIRS) # the RPATH to be used when installing @@ -311,12 +320,12 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/") endif() if(BLAS_FOUND) - set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES};${BLASLAPACK_LIBRARIES}") - #message(STATUS "SCALFMM_LIBRARIES = ${SCALFMM_LIBRARIES}") + set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES};${BLASLAPACK_LIBRARIES}") + #message(STATUS "SCALFMM_LIBRARIES = ${SCALFMM_LIBRARIES}") else() - message(WARNING "BLAS has not been found, SCALFMM will continue to compile but some applications will be disabled.") - message(WARNING "If you have BLAS set BLAS_LIBDIR, BLAS_INCDIR or BLAS_DIR (CMake variables using -D or environment variables).") - set(SCALFMM_USE_BLAS OFF) + message(WARNING "BLAS has not been found, SCALFMM will continue to compile but some applications will be disabled.") + message(WARNING "If you have BLAS set BLAS_LIBDIR, BLAS_INCDIR or BLAS_DIR (CMake variables using -D or environment variables).") + set(SCALFMM_USE_BLAS OFF) endif() endif(SCALFMM_USE_BLAS) @@ -338,7 +347,7 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/") message(STATUS " SCALFMM USE MKL already defined") set(FFT_INCLUDES "$ENV{MKLROOT}/include/fftw" CACHE STRING "Set your MKL flags") if (BLAS_FOUND) - set(FFTW_FOUND ON) + set(FFTW_FOUND ON) endif() else(SCALFMM_USE_MKL_AS_BLAS) @@ -348,16 +357,16 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/") # Default is DOUBLE and without THREADS|OMP find_package(FFTW COMPONENTS MKL) # not REQUIRED if (FFTW_LIBRARY_DIRS_DEP) - set(FFT_LIBRARIES "-L${FFTW_LIBRARY_DIRS_DEP};" CACHE STRING "Set your MKL flags") + set(FFT_LIBRARIES "-L${FFTW_LIBRARY_DIRS_DEP};" CACHE STRING "Set your MKL flags") endif() if (FFTW_LIBRARIES_DEP) - foreach (fft_lib ${FFTW_LIBRARIES_DEP}) - set(FFT_LIBRARIES "${FFT_LIBRARIES};${fft_lib};") - endforeach() + foreach (fft_lib ${FFTW_LIBRARIES_DEP}) + set(FFT_LIBRARIES "${FFT_LIBRARIES};${fft_lib};") + endforeach() endif() set(FFT_INCLUDES "${FFTW_INCLUDE_DIRS_DEP}" ) if (FFT_LIBRARIES) - set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES};${FFT_LIBRARIES}") + set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES};${FFT_LIBRARIES}") endif() endif(SCALFMM_USE_MKL_AS_BLAS) @@ -385,21 +394,21 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/") endif(SCALFMM_USE_MKL_AS_FFTW) if (FFT_INCLUDES) - set(SCALFMM_INCLUDES "${SCALFMM_INCLUDES}; ${FFT_INCLUDES}") + set(SCALFMM_INCLUDES "${SCALFMM_INCLUDES}; ${FFT_INCLUDES}") endif() if(FFTW_FOUND) - message(STATUS " SCALFMM_LIBRARIES = ${SCALFMM_LIBRARIES}") - message(STATUS " SCALFMM_INCLUDES = ${SCALFMM_INCLUDES}") + message(STATUS " SCALFMM_LIBRARIES = ${SCALFMM_LIBRARIES}") + message(STATUS " SCALFMM_INCLUDES = ${SCALFMM_INCLUDES}") else() - message(WARNING "FFTW has not been found, SCALFMM will continue to compile but some applications will be disabled.") - message(WARNING "If you have FFTW set FFTW_LIBDIR, FFTW_INCDIR or FFTW_DIR (CMake variables using -D or environment variables).") - set(SCALFMM_USE_FFT OFF) + message(WARNING "FFTW has not been found, SCALFMM will continue to compile but some applications will be disabled.") + message(WARNING "If you have FFTW set FFTW_LIBDIR, FFTW_INCDIR or FFTW_DIR (CMake variables using -D or environment variables).") + set(SCALFMM_USE_FFT OFF) endif() endif(SCALFMM_USE_FFT) list(APPEND FUSE_LIST "FFT") - message(STATUS " SCALFMM_LIBRARIES = ${SCALFMM_LIBRARIES}") - message(STATUS " SCALFMM_INCLUDES = ${SCALFMM_INCLUDES}") + message(STATUS " SCALFMM_LIBRARIES = ${SCALFMM_LIBRARIES}") + message(STATUS " SCALFMM_INCLUDES = ${SCALFMM_INCLUDES}") message(STATUS "SCALFMM_USE_FFT = ${SCALFMM_USE_FFT}") @@ -423,38 +432,38 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/") if( SCALFMM_USE_STARPU ) # No fast math with starpu if(SCALFMM_CXX_FLAGS) - string(REPLACE "-ffast-math" " " SCALFMM_CXX_FLAGS ${SCALFMM_CXX_FLAGS}) + string(REPLACE "-ffast-math" " " SCALFMM_CXX_FLAGS ${SCALFMM_CXX_FLAGS}) endif() if(SCALFMM_FLAGS_OPTI_RELEASE) - string(REPLACE "-ffast-math" " " SCALFMM_FLAGS_OPTI_RELEASE ${SCALFMM_FLAGS_OPTI_RELEASE}) + string(REPLACE "-ffast-math" " " SCALFMM_FLAGS_OPTI_RELEASE ${SCALFMM_FLAGS_OPTI_RELEASE}) endif() # CUDA could be used with StarPU enabled option( SCALFMM_USE_CUDA "Set to ON to use CUDA with StarPU" OFF ) message( STATUS "SCALFMM_USE_CUDA = ${SCALFMM_USE_CUDA}" ) if(SCALFMM_USE_CUDA) - execute_process(COMMAND nvcc --version ERROR_VARIABLE cuda_error_output OUTPUT_QUIET) - if(cuda_error_output) - message( FATAL_ERROR "nvcc is needed with CUDA." ) - endif() - if(NOT DEFINED CUSTOM_CUDA_FLAGS) - set( CUSTOM_CUDA_FLAGS "-std=c++11;-arch=sm_20" CACHE - STRING "Set your CUDA flags, for example : -arch=sm_20;-ptxas-options=-v;-use_fast_math") - endif() - # This is needed to remove backslash after space in ADD_CUSTOM_COMMAND - separate_arguments(CUSTOM_CUDA_FLAGS) - message( STATUS "CUSTOM_CUDA_FLAGS = ${CUSTOM_CUDA_FLAGS}" ) + execute_process(COMMAND nvcc --version ERROR_VARIABLE cuda_error_output OUTPUT_QUIET) + if(cuda_error_output) + message( FATAL_ERROR "nvcc is needed with CUDA." ) + endif() + if(NOT DEFINED CUSTOM_CUDA_FLAGS) + set( CUSTOM_CUDA_FLAGS "-std=c++11;-arch=sm_20" CACHE + STRING "Set your CUDA flags, for example : -arch=sm_20;-ptxas-options=-v;-use_fast_math") + endif() + # This is needed to remove backslash after space in ADD_CUSTOM_COMMAND + separate_arguments(CUSTOM_CUDA_FLAGS) + message( STATUS "CUSTOM_CUDA_FLAGS = ${CUSTOM_CUDA_FLAGS}" ) - find_package(CUDA REQUIRED) + find_package(CUDA REQUIRED) - if (CUDA_INCLUDE_DIRS) - include_directories(${CUDA_INCLUDE_DIRS}) - endif() - if (CUDA_LIBRARIES) - set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES};${CUDA_LIBRARIES}") - endif() - - set(CUDA_NEEDED_INCLUDE_DIRS ${CMAKE_BINARY_DIR}/Src) + if (CUDA_INCLUDE_DIRS) + include_directories(${CUDA_INCLUDE_DIRS}) + endif() + if (CUDA_LIBRARIES) + set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES};${CUDA_LIBRARIES}") + endif() + + set(CUDA_NEEDED_INCLUDE_DIRS ${CMAKE_BINARY_DIR}/Src) endif() # Find StarPU with a list of optional components @@ -462,50 +471,50 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/") # create list of components in order to make a single call to find_package(starpu...) set(STARPU_COMPONENT_LIST "HWLOC") if(SCALFMM_USE_MPI) - list(APPEND STARPU_COMPONENT_LIST "MPI") + list(APPEND STARPU_COMPONENT_LIST "MPI") endif() if(SCALFMM_USE_CUDA) - list(APPEND STARPU_COMPONENT_LIST "CUDA") + list(APPEND STARPU_COMPONENT_LIST "CUDA") endif() find_package(STARPU ${SCALFMM_STARPU_VERSION} REQUIRED - COMPONENTS ${STARPU_COMPONENT_LIST}) + COMPONENTS ${STARPU_COMPONENT_LIST}) # Append list of libraries and include dirs include_directories(${STARPU_INCLUDE_DIRS_DEP}) foreach (starpu_libdir ${STARPU_LIBRARY_DIRS_DEP}) if (${starpu_libdir} MATCHES "^ *-L") - set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES};${starpu_libdir}") + set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES};${starpu_libdir}") else() - set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES};-L${starpu_libdir}") + set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES};-L${starpu_libdir}") endif() endforeach() foreach (starpu_lib ${STARPU_LIBRARIES_DEP}) if (EXISTS ${starpu_lib} OR ${starpu_lib} MATCHES "^ *-") - set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES};${starpu_lib}") + set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES};${starpu_lib}") else() - set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES};-l${starpu_lib}") + set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES};-l${starpu_lib}") endif() endforeach() # TODO: is this very useful? CUDA is already a component of find starpu if (CUDA_LIBRARIES) - set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES};${CUDA_LIBRARIES}") + set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES};${CUDA_LIBRARIES}") endif() # Message message(STATUS " STARPU_LIBRARIES = ${STARPU_LIBRARIES}") if (STARPU_INCLUDE_DIRS) - message(STATUS " STARPU_INCLUDES = ${STARPU_INCLUDE_DIRS}") - set(SCALFMM_INCLUDES "${SCALFMM_INCLUDES}; ${STARPU_INCLUDE_DIRS}") + message(STATUS " STARPU_INCLUDES = ${STARPU_INCLUDE_DIRS}") + set(SCALFMM_INCLUDES "${SCALFMM_INCLUDES}; ${STARPU_INCLUDE_DIRS}") endif() # TODO: replace this by a component of find starpu OPTION( SCALFMM_USE_OPENCL "Set to ON to use OPENCL with StarPU" OFF ) MESSAGE( STATUS "SCALFMM_USE_OPENCL = ${SCALFMM_USE_OPENCL}" ) if(SCALFMM_USE_OPENCL) - include_directories($ENV{OPENCL_INC}) - SET(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES};-L$ENV{OPENCL_LIB};-lOpenCL") + include_directories($ENV{OPENCL_INC}) + SET(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES};-L$ENV{OPENCL_LIB};-lOpenCL") endif() endif(SCALFMM_USE_STARPU) list(APPEND FUSE_LIST "STARPU") @@ -523,16 +532,16 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/") endif() message( STATUS "SSE_FLAGS ${SSE_FLAGS} -- ${CMAKE_CXX_FLAGS} ") try_compile(COMPILE_SSE ${CMAKE_CURRENT_BINARY_DIR} - ${SCALFMM_CMAKE_MODULE_PATH}/compileTestSse.cpp - COMPILE_DEFINITIONS "${CMAKE_CXX_FLAGS} ${SSE_FLAGS}" - OUTPUT_VARIABLE COMPILE_SSE_OUTPUT) + ${SCALFMM_CMAKE_MODULE_PATH}/compileTestSse.cpp + COMPILE_DEFINITIONS "${CMAKE_CXX_FLAGS} ${SSE_FLAGS}" + OUTPUT_VARIABLE COMPILE_SSE_OUTPUT) if(${COMPILE_SSE}) set(SCALFMM_CXX_FLAGS "${SCALFMM_CXX_FLAGS} ${SSE_FLAGS}") try_compile(COMPILE_RESULT_VAR ${CMAKE_CURRENT_BINARY_DIR} - ${SCALFMM_CMAKE_MODULE_PATH}/checkSSEpe.cpp - COMPILE_DEFINITIONS "${CMAKE_CXX_FLAGS} ${SSE_FLAGS}") + ${SCALFMM_CMAKE_MODULE_PATH}/checkSSEpe.cpp + COMPILE_DEFINITIONS "${CMAKE_CXX_FLAGS} ${SSE_FLAGS}") if( NOT ${COMPILE_RESULT_VAR}) set(__SSEPE_INTEL_COMPILER ON) endif() @@ -554,23 +563,23 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/") endif() try_compile(COMPILE_AVX ${CMAKE_CURRENT_BINARY_DIR} - ${SCALFMM_CMAKE_MODULE_PATH}/compileTestAvx.cpp - COMPILE_DEFINITIONS "${CMAKE_CXX_FLAGS} ${AVX_FLAGS}" - OUTPUT_VARIABLE COMPILE_AVX_OUTPUT) - if(${COMPILE_AVX}) - message(STATUS "%%%%%%%%%%%% COMPILE_AVX = ${COMPILE_AVX} %%%%< ${AVX_FLAGS}") - + ${SCALFMM_CMAKE_MODULE_PATH}/compileTestAvx.cpp + COMPILE_DEFINITIONS "${CMAKE_CXX_FLAGS} ${AVX_FLAGS}" + OUTPUT_VARIABLE COMPILE_AVX_OUTPUT) + if(${COMPILE_AVX}) + message(STATUS "%%%%%%%%%%%% COMPILE_AVX = ${COMPILE_AVX} %%%%< ${AVX_FLAGS}") + set(SCALFMM_CXX_FLAGS "${SCALFMM_CXX_FLAGS} ${AVX_FLAGS}") - message(STATUS "%%%%%%%%%%%% SCALFMM_CXX_FLAGS = ${SCALFMM_CXX_FLAGS}") - #set( SCALFMM_USE_SSE OFF FORCE) # ne marche pas + message(STATUS "%%%%%%%%%%%% SCALFMM_CXX_FLAGS = ${SCALFMM_CXX_FLAGS}") + #set( SCALFMM_USE_SSE OFF FORCE) # ne marche pas try_compile(COMPILE_RESULT_AVSPE ${CMAKE_CURRENT_BINARY_DIR} - ${SCALFMM_CMAKE_MODULE_PATH}/checkAVXpe.cpp - COMPILE_DEFINITIONS "${CMAKE_CXX_FLAGS} ${AVX_FLAGS}") - if( NOT ${COMPILE_RESULT_AVSPE}) + ${SCALFMM_CMAKE_MODULE_PATH}/checkAVXpe.cpp + COMPILE_DEFINITIONS "${CMAKE_CXX_FLAGS} ${AVX_FLAGS}") + if( NOT ${COMPILE_RESULT_AVSPE}) set(__AVXPE_INTEL_COMPILER ON) - endif() + endif() message(STATUS ${CMAKE_CXX_FLAGS} ) else(${COMPILE_AVX}) @@ -595,15 +604,15 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/") endif() try_compile(COMPILE_AVX2 ${CMAKE_CURRENT_BINARY_DIR} - ${SCALFMM_CMAKE_MODULE_PATH}/compileTestAvx2.cpp - COMPILE_DEFINITIONS "${CMAKE_CXX_FLAGS} ${AVX2_FLAGS}" - OUTPUT_VARIABLE COMPILE_AVX2_OUTPUT) + ${SCALFMM_CMAKE_MODULE_PATH}/compileTestAvx2.cpp + COMPILE_DEFINITIONS "${CMAKE_CXX_FLAGS} ${AVX2_FLAGS}" + OUTPUT_VARIABLE COMPILE_AVX2_OUTPUT) if(${COMPILE_AVX2}) set(SCALFMM_CXX_FLAGS "${SCALFMM_CXX_FLAGS} ${AVX2_FLAGS}") #set( SCALFMM_USE_SSE OFF FORCE) # ne marche pas try_compile(COMPILE_RESULT_AVSPE ${CMAKE_CURRENT_BINARY_DIR} - ${SCALFMM_CMAKE_MODULE_PATH}/checkAVX2pe.cpp - COMPILE_DEFINITIONS "${CMAKE_CXX_FLAGS} ${AVX2_FLAGS}") + ${SCALFMM_CMAKE_MODULE_PATH}/checkAVX2pe.cpp + COMPILE_DEFINITIONS "${CMAKE_CXX_FLAGS} ${AVX2_FLAGS}") if( NOT ${COMPILE_RESULT_AVSPE}) set(__AVX2PE_INTEL_COMPILER ON) endif() @@ -643,15 +652,27 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/") find_package (PkgConfig) if(PKG_CONFIG_FOUND) set(PKG_CONFIG_USE_CMAKE_PREFIX_PATH "ON") - pkg_search_module( EZTrace REQUIRED eztrace) - link_directories(${EZTrace_LIBRARY_DIRS}) - link_libraries( ${EZTrace_LIBRARIES} -leztrace-memory) - include_directories(${EZTrace_INCLUDE_DIRS}) - MESSAGE(STATUS "EZTRACE: ${EZTrace_INCLUDE_DIRS} ${EZTrace_LIBRARY_DIRS} ${EZTrace_LIBRARIES}") - CMAKE_DEPENDENT_OPTION(SCALFMM_TRACE_M2L "Set to ON to trace M2L operator" ON "SCALFMM_USE_EZTRACE" OFF ) - + pkg_search_module( EZTrace REQUIRED eztrace) + if(PEZTrace_FOUND) + link_directories(${EZTrace_LIBRARY_DIRS}) + link_libraries( ${EZTrace_LIBRARIES}) + IF( SCALFMM_USE_MPI ) + link_libraries(-leztrace-mpi) + ENDIF(SCALFMM_USE_MPI) + include_directories(${EZTrace_INCLUDE_DIRS}) + MESSAGE(STATUS "EZTRACE: ${EZTrace_INCLUDE_DIRS} ${EZTrace_LIBRARY_DIRS} ${EZTrace_LIBRARIES}") + CMAKE_DEPENDENT_OPTION(SCALFMM_TRACE_ALGO "Set to ON to trace the full algorithm (all operators)" ON "SCALFMM_USE_EZTRACE" OFF ) + CMAKE_DEPENDENT_OPTION(SCALFMM_TRACE_P2M "Set to ON to trace P2M operator" OFF "SCALFMM_USE_EZTRACE" OFF ) + CMAKE_DEPENDENT_OPTION(SCALFMM_TRACE_M2M "Set to ON to trace M2M operator" OFF "SCALFMM_USE_EZTRACE" OFF ) + CMAKE_DEPENDENT_OPTION(SCALFMM_TRACE_M2L "Set to ON to trace M2L operator" OFF "SCALFMM_USE_EZTRACE" OFF ) + CMAKE_DEPENDENT_OPTION(SCALFMM_TRACE_L2L "Set to ON to trace L2L operator" OFF "SCALFMM_USE_EZTRACE" OFF ) + CMAKE_DEPENDENT_OPTION(SCALFMM_TRACE_P2P "Set to ON to trace P2P operator" OFF "SCALFMM_USE_EZTRACE" OFF ) + else(EZTrace_FOUND) + MESSAGE(WARNING "Eztrace not found - EZTRACE Is set to OFF") + set(SCALFMM_USE_EZTRACE OFF) + endif(EZTrace_FOUND) else(PKG_CONFIG_FOUND) - MESSAGE(WARNING "PKG-CONFIG not found- EZTRACE Is set to NONE") + MESSAGE(WARNING "PKG-CONFIG not found - EZTRACE Is set to OFF") set(SCALFMM_USE_EZTRACE OFF) endif(PKG_CONFIG_FOUND) @@ -669,11 +690,11 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/") # We need the libraries without spaces (inside the config file) set(SCALFMM_COMPILE_LIBS "") foreach(lib_var ${SCALFMM_LIBRARIES}) - string(STRIP ${lib_var} lib_var) - LIST(APPEND SCALFMM_COMPILE_LIBS ${lib_var}) + string(STRIP ${lib_var} lib_var) + LIST(APPEND SCALFMM_COMPILE_LIBS ${lib_var}) endforeach() configure_file( ${CMAKE_CURRENT_SOURCE_DIR}/Src/ScalFmmConfig.h.cmake - ${CMAKE_BINARY_DIR}/Src/ScalFmmConfig.h ) + ${CMAKE_BINARY_DIR}/Src/ScalFmmConfig.h ) # ################################################################## # Build - lib # @@ -817,7 +838,7 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/") message(STATUS "SCALFMM_USE_BLAS = ${SCALFMM_USE_BLAS}") message(STATUS "SCALFMM_USE_FFT = ${SCALFMM_USE_FFT}") message(STATUS "SCALFMM_USE_MKL = ${SCALFMM_USE_MKL}") -# + # message(STATUS "CMAKE_CXX_FLAGS = ${CMAKE_CXX_FLAGS}") message(STATUS "SCALFMM_CXX_FLAGS = ${SCALFMM_CXX_FLAGS}") message(STATUS "SCALFMM_LIBRARIES = ${SCALFMM_LIBRARIES}") diff --git a/CMakeModules/morse/Ressources.cmake b/CMakeModules/morse/Ressources.cmake index 3a5cf6cdf0fb7ba0418925acb2252dccc0a8224d..15b7a35d11cd2cd2ddb5ff358bb3139bbe92cfa0 100644 --- a/CMakeModules/morse/Ressources.cmake +++ b/CMakeModules/morse/Ressources.cmake @@ -43,9 +43,9 @@ if(NOT DEFINED PROCESSOR_COUNT) if(APPLE) find_program(cmd_sys_pro "system_profiler") if(cmd_sys_pro) - execute_process(COMMAND ${cmd_sys_pro} OUTPUT_VARIABLE info) - string(REGEX REPLACE "^.*Total Number Of Cores: ([0-9]+).*$" "\\1" - NUMBER_OF_CPU "${info}") + execute_process(COMMAND ${cmd_sys_pro} SPHardwareDataType OUTPUT_VARIABLE info) + string(REGEX REPLACE "^.*Total Number of Cores: ([0-9]+).*$" "\\1" + NUMBER_OF_CPU "${info}") endif() endif() diff --git a/CMakeModules/morse/find/FindPTSCOTCH.cmake b/CMakeModules/morse/find/FindPTSCOTCH.cmake index 88c22713ca2a07d36b301e5f9f5b90f678426e75..e457dcac9a49f9f226d0e5f4f2531768ac9ddfc4 100644 --- a/CMakeModules/morse/find/FindPTSCOTCH.cmake +++ b/CMakeModules/morse/find/FindPTSCOTCH.cmake @@ -264,10 +264,21 @@ if(PTSCOTCH_LIBRARIES) if(CMAKE_THREAD_LIBS_INIT) list(APPEND REQUIRED_LIBS "${CMAKE_THREAD_LIBS_INIT}") endif() - if(UNIX OR WIN32) + set(Z_LIBRARY "Z_LIBRARY-NOTFOUND") + find_library(Z_LIBRARY NAMES z) + if(Z_LIBRARY) + list(APPEND REQUIRED_LIBS "-lz") + endif() + set(M_LIBRARY "M_LIBRARY-NOTFOUND") + find_library(M_LIBRARY NAMES m) + if(M_LIBRARY) list(APPEND REQUIRED_LIBS "-lm") endif() - list(APPEND REQUIRED_LIBS "-lz -lrt") + set(RT_LIBRARY "RT_LIBRARY-NOTFOUND") + find_library(RT_LIBRARY NAMES rt) + if(RT_LIBRARY) + list(APPEND REQUIRED_LIBS "-lrt") + endif() # set required libraries for link set(CMAKE_REQUIRED_INCLUDES "${REQUIRED_INCDIRS}") diff --git a/CMakeModules/morse/find/FindSCOTCH.cmake b/CMakeModules/morse/find/FindSCOTCH.cmake index 1f4dc25afe832307068f54453e7f05b566ba2919..c24242ce6fbd44465962f1521a0068fb50ff3a87 100644 --- a/CMakeModules/morse/find/FindSCOTCH.cmake +++ b/CMakeModules/morse/find/FindSCOTCH.cmake @@ -233,10 +233,21 @@ if(SCOTCH_LIBRARIES) if(CMAKE_THREAD_LIBS_INIT) list(APPEND REQUIRED_LIBS "${CMAKE_THREAD_LIBS_INIT}") endif() - if(UNIX OR WIN32) + set(Z_LIBRARY "Z_LIBRARY-NOTFOUND") + find_library(Z_LIBRARY NAMES z) + if(Z_LIBRARY) + list(APPEND REQUIRED_LIBS "-lz") + endif() + set(M_LIBRARY "M_LIBRARY-NOTFOUND") + find_library(M_LIBRARY NAMES m) + if(M_LIBRARY) list(APPEND REQUIRED_LIBS "-lm") endif() - list(APPEND REQUIRED_LIBS "-lz -lrt") + set(RT_LIBRARY "RT_LIBRARY-NOTFOUND") + find_library(RT_LIBRARY NAMES rt) + if(RT_LIBRARY) + list(APPEND REQUIRED_LIBS "-lrt") + endif() # set required libraries for link set(CMAKE_REQUIRED_INCLUDES "${REQUIRED_INCDIRS}") diff --git a/Src/Core/FFmmAlgorithm.hpp b/Src/Core/FFmmAlgorithm.hpp index 3a8fffc259d63a41508f197cc35a60a9599b38a5..49010c41abdbad8185ac66bcc86c7ac80cfd5314 100644 --- a/Src/Core/FFmmAlgorithm.hpp +++ b/Src/Core/FFmmAlgorithm.hpp @@ -62,6 +62,7 @@ public: FAssertLF(tree, "tree cannot be null"); FAssertLF(kernels, "kernels cannot be null"); + FAssertLF(leafLevelSeparationCriteria < 3, "Separation criteria should be < 3"); FAbstractAlgorithm::setNbLevelsInTree(tree->getHeight()); diff --git a/Src/Core/FFmmAlgorithmPeriodic.hpp b/Src/Core/FFmmAlgorithmPeriodic.hpp index bdd5f8cca3a34931a991c33d788369fbcb957947..d8107679a4ecde0b4e8c01e838b7c07aedf089e0 100644 --- a/Src/Core/FFmmAlgorithmPeriodic.hpp +++ b/Src/Core/FFmmAlgorithmPeriodic.hpp @@ -67,6 +67,7 @@ public: FAssertLF(tree, "tree cannot be null"); FAssertLF(-1 <= inUpperLevel, "inUpperLevel cannot be < -1"); + FAssertLF(leafLevelSeperationCriteria < 3, "Separation criteria should be < 3"); FAbstractAlgorithm::setNbLevelsInTree(extendedTreeHeight()); diff --git a/Src/Core/FFmmAlgorithmSectionTask.hpp b/Src/Core/FFmmAlgorithmSectionTask.hpp index ebafb2e9c9fffbe1a12282bc4eb192ffaa04daaf..658b8889a7404dc1c2beb0c0f29761571e4d171a 100644 --- a/Src/Core/FFmmAlgorithmSectionTask.hpp +++ b/Src/Core/FFmmAlgorithmSectionTask.hpp @@ -27,6 +27,7 @@ #include "../Containers/FVector.hpp" #include "FCoreCommon.hpp" +#include "FP2PExclusion.hpp" /** * @author Berenger Bramas (berenger.bramas@inria.fr) @@ -45,7 +46,7 @@ * * Upon destruction, this class does not deallocate pointers given to its constructor. */ -template<class OctreeClass, class CellClass, class ContainerClass, class KernelClass, class LeafClass> +template<class OctreeClass, class CellClass, class ContainerClass, class KernelClass, class LeafClass, class P2PExclusionClass = FP2PMiddleExclusion> class FFmmAlgorithmSectionTask : public FAbstractAlgorithm, public FAlgorithmTimers { OctreeClass* const tree; ///< The octree to work on @@ -74,13 +75,14 @@ public: FAssertLF(tree, "tree cannot be null"); FAssertLF(inKernels, "kernels cannot be null"); + FAssertLF(leafLevelSeparationCriteria < 3, "Separation criteria should be < 3"); this->kernels = new KernelClass*[MaxThreads]; - #pragma omp parallel for schedule(static) - for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread){ + #pragma omp parallel num_threads(MaxThreads) + { #pragma omp critical (InitFFmmAlgorithmSectionTask) { - this->kernels[idxThread] = new KernelClass(*inKernels); + this->kernels[omp_get_thread_num()] = new KernelClass(*inKernels); } } @@ -327,7 +329,7 @@ protected: // There is a maximum of 26 neighbors ContainerClass* neighbors[27]; - const int SizeShape = 3*3*3; + const int SizeShape = P2PExclusionClass::SizeShape; FVector<typename OctreeClass::Iterator> shapes[SizeShape]; typename OctreeClass::Iterator octreeIterator(tree); @@ -337,7 +339,7 @@ protected: // Coloring all the cells do{ const FTreeCoordinate& coord = octreeIterator.getCurrentGlobalCoordinate(); - const int shapePosition = (coord.getX()%3)*9 + (coord.getY()%3)*3 + (coord.getZ()%3); + const int shapePosition = P2PExclusionClass::GetShapeIdx(coord); shapes[shapePosition].push(octreeIterator); diff --git a/Src/Core/FFmmAlgorithmTask.hpp b/Src/Core/FFmmAlgorithmTask.hpp index 8d87deaf43ced395b8f61475df0fa23ec4d9ef36..2856035ac93f0e2fd823298d880f6ea625012bf9 100644 --- a/Src/Core/FFmmAlgorithmTask.hpp +++ b/Src/Core/FFmmAlgorithmTask.hpp @@ -27,6 +27,7 @@ #include "../Containers/FVector.hpp" #include "FCoreCommon.hpp" +#include "FP2PExclusion.hpp" /** * @author Berenger Bramas (berenger.bramas@inria.fr) @@ -39,7 +40,7 @@ * * Of course this class does not deallocate pointer given in arguements. */ -template<class OctreeClass, class CellClass, class ContainerClass, class KernelClass, class LeafClass> +template<class OctreeClass, class CellClass, class ContainerClass, class KernelClass, class LeafClass, class P2PExclusionClass = FP2PMiddleExclusion> class FFmmAlgorithmTask : public FAbstractAlgorithm, public FAlgorithmTimers { OctreeClass* const tree; //< The octree to work on @@ -49,7 +50,7 @@ class FFmmAlgorithmTask : public FAbstractAlgorithm, public FAlgorithmTimers { const int OctreeHeight; - const int leafLevelSeperationCriteria; + const int leafLevelSeparationCriteria; public: /** The constructor need the octree and the kernels used for computation * @param inTree the octree to work on @@ -58,20 +59,21 @@ public: */ FFmmAlgorithmTask(OctreeClass* const inTree, KernelClass* const inKernels, const int inLeafLevelSeperationCriteria = 1) : tree(inTree) , kernels(nullptr), - MaxThreads(omp_get_max_threads()), OctreeHeight(tree->getHeight()), leafLevelSeperationCriteria(inLeafLevelSeperationCriteria) + MaxThreads(omp_get_max_threads()), OctreeHeight(tree->getHeight()), leafLevelSeparationCriteria(inLeafLevelSeperationCriteria) { FAssertLF(tree, "tree cannot be null"); FAssertLF(inKernels, "kernels cannot be null"); + FAssertLF(leafLevelSeparationCriteria < 3, "Separation criteria should be < 3"); this->kernels = new KernelClass*[MaxThreads]; -#pragma omp parallel for schedule(static) - for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread){ -#pragma omp critical (InitFFmmAlgorithmTask) - { - this->kernels[idxThread] = new KernelClass(*inKernels); - } - } + #pragma omp parallel num_threads(MaxThreads) + { + #pragma omp critical (InitFFmmAlgorithmTask) + { + this->kernels[omp_get_thread_num()] = new KernelClass(*inKernels); + } + } FAbstractAlgorithm::setNbLevelsInTree(tree->getHeight()); @@ -239,7 +241,7 @@ protected: // for each levels for(int idxLevel = FAbstractAlgorithm::upperWorkingLevel ; idxLevel < FAbstractAlgorithm::lowerWorkingLevel ; ++idxLevel ){ FLOG(FTic counterTimeLevel); - const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeperationCriteria); + const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeparationCriteria); // for each cell we apply the M2L with all cells in the implicit interaction list do{ #pragma omp task firstprivate(octreeIterator) private(neighbors) shared(idxLevel) @@ -286,7 +288,7 @@ protected: // for each levels for(int idxLevel = FAbstractAlgorithm::upperWorkingLevel ; idxLevel < FAbstractAlgorithm::lowerWorkingLevel ; ++idxLevel ){ FLOG(FTic counterTimeLevel); - const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeperationCriteria); + const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeparationCriteria); // for each cells do{ //#pragma omp task default(none) firstprivate(octreeIterator,separationCriteria) private( neighbors) shared(idxLevel) @@ -388,7 +390,7 @@ protected: // There is a maximum of 26 neighbors ContainerClass* neighbors[27]; - const int SizeShape = 3*3*3; + const int SizeShape = P2PExclusionClass::SizeShape; FVector<typename OctreeClass::Iterator> shapes[SizeShape]; typename OctreeClass::Iterator octreeIterator(tree); @@ -397,7 +399,7 @@ protected: // for each leafs do{ const FTreeCoordinate& coord = octreeIterator.getCurrentGlobalCoordinate(); - const int shapePosition = (coord.getX()%3)*9 + (coord.getY()%3)*3 + (coord.getZ()%3); + const int shapePosition = P2PExclusionClass::GetShapeIdx(coord); shapes[shapePosition].push(octreeIterator); diff --git a/Src/Core/FFmmAlgorithmThread.hpp b/Src/Core/FFmmAlgorithmThread.hpp index daf2303b06ed3b5b9692c444e924efc084d11035..85c7ffbe83577d2d984d372e39d1c8cf124ac652 100644 --- a/Src/Core/FFmmAlgorithmThread.hpp +++ b/Src/Core/FFmmAlgorithmThread.hpp @@ -27,6 +27,7 @@ #include "../Containers/FOctree.hpp" #include "FCoreCommon.hpp" +#include "FP2PExclusion.hpp" #include <omp.h> @@ -45,7 +46,7 @@ * * This class does not deallocate pointers given to its constructor. */ -template<class OctreeClass, class CellClass, class ContainerClass, class KernelClass, class LeafClass> +template<class OctreeClass, class CellClass, class ContainerClass, class KernelClass, class LeafClass, class P2PExclusionClass = FP2PMiddleExclusion> class FFmmAlgorithmThread : public FAbstractAlgorithm, public FAlgorithmTimers{ OctreeClass* const tree; ///< The octree to work on. KernelClass** kernels; ///< The kernels. @@ -53,7 +54,7 @@ class FFmmAlgorithmThread : public FAbstractAlgorithm, public FAlgorithmTimers{ typename OctreeClass::Iterator* iterArray; int leafsNumber; - static const int SizeShape = 3*3*3; + static const int SizeShape = P2PExclusionClass::SizeShape; int shapeLeaf[SizeShape]; const int MaxThreads; ///< The maximum number of threads. @@ -62,7 +63,7 @@ class FFmmAlgorithmThread : public FAbstractAlgorithm, public FAlgorithmTimers{ int userChunkSize; - const int leafLevelSeperationCriteria; + const int leafLevelSeparationCriteria; public: /** Class constructor @@ -79,15 +80,17 @@ public: const int inUserChunkSize = 10, const int inLeafLevelSeperationCriteria = 1) : tree(inTree) , kernels(nullptr), iterArray(nullptr), leafsNumber(0), MaxThreads(omp_get_max_threads()), OctreeHeight(tree->getHeight()), - userChunkSize(inUserChunkSize), leafLevelSeperationCriteria(inLeafLevelSeperationCriteria) { + userChunkSize(inUserChunkSize), leafLevelSeparationCriteria(inLeafLevelSeperationCriteria) { FAssertLF(tree, "tree cannot be null"); + FAssertLF(leafLevelSeparationCriteria < 3, "Separation criteria should be < 3"); + FAssertLF(0 < userChunkSize, "Chunk size should be > 0"); this->kernels = new KernelClass*[MaxThreads]; - #pragma omp parallel for schedule(static) - for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread){ - #pragma omp critical (InitFFmmAlgorithmThread) + #pragma omp parallel num_threads(MaxThreads) + { + #pragma omp critical (InitFFmmAlgorithmThread) { - this->kernels[idxThread] = new KernelClass(*inKernels); + this->kernels[omp_get_thread_num()] = new KernelClass(*inKernels); } } @@ -138,7 +141,7 @@ protected: do{ ++leafsNumber; const FTreeCoordinate& coord = octreeIterator.getCurrentCell()->getCoordinate(); - ++this->shapeLeaf[(coord.getX()%3)*9 + (coord.getY()%3)*3 + (coord.getZ()%3)]; + ++this->shapeLeaf[P2PExclusionClass::GetShapeIdx(coord)]; } while(octreeIterator.moveRight()); iterArray = new typename OctreeClass::Iterator[leafsNumber]; @@ -296,7 +299,7 @@ protected: // for each levels for(int idxLevel = FAbstractAlgorithm::upperWorkingLevel ; idxLevel < FAbstractAlgorithm::lowerWorkingLevel ; ++idxLevel ){ FLOG(FTic counterTimeLevel); - const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeperationCriteria); + const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeparationCriteria); int numberOfCells = 0; // for each cells do{ @@ -439,7 +442,7 @@ protected: //iterArray[leafs] = octreeIterator; //++leafs; const FTreeCoordinate& coord = octreeIterator.getCurrentGlobalCoordinate(); - const int shapePosition = (coord.getX()%3)*9 + (coord.getY()%3)*3 + (coord.getZ()%3); + const int shapePosition = P2PExclusionClass::GetShapeIdx(coord); omp_set_lock(&lockShape[shapePosition]); const int positionToWork = startPosAtShape[shapePosition]++; diff --git a/Src/Core/FFmmAlgorithmThreadBalance.hpp b/Src/Core/FFmmAlgorithmThreadBalance.hpp index 3d3afb85923b99bd4bf0a0f1d2162d6d2fb34ad7..002aa4e309f04da1b8c2596c0e18b8925e749b46 100644 --- a/Src/Core/FFmmAlgorithmThreadBalance.hpp +++ b/Src/Core/FFmmAlgorithmThreadBalance.hpp @@ -11,6 +11,7 @@ #include "../Containers/FOctree.hpp" #include "FCoreCommon.hpp" +#include "FP2PExclusion.hpp" #include <omp.h> #include <vector> @@ -29,18 +30,18 @@ * * This class does not deallocate pointers given to its constructor. */ -template<class OctreeClass, class CellClass, class ContainerClass, class KernelClass, class LeafClass> +template<class OctreeClass, class CellClass, class ContainerClass, class KernelClass, class LeafClass, class P2PExclusionClass = FP2PMiddleExclusion> class FFmmAlgorithmThreadBalance : public FAbstractAlgorithm, public FAlgorithmTimers{ OctreeClass* const tree; ///< The octree to work on. KernelClass** kernels; ///< The kernels. - static const int SizeShape = 3*3*3; + static const int SizeShape = P2PExclusionClass::SizeShape; const int MaxThreads; ///< The maximum number of threads. const int OctreeHeight; ///< The height of the given tree. - const int leafLevelSeperationCriteria; + const int leafLevelSeparationCriteria; public: /** Class constructor @@ -57,15 +58,16 @@ public: const int inLeafLevelSeperationCriteria = 1) : tree(inTree) , kernels(nullptr), MaxThreads(omp_get_max_threads()), OctreeHeight(tree->getHeight()), - leafLevelSeperationCriteria(inLeafLevelSeperationCriteria) { + leafLevelSeparationCriteria(inLeafLevelSeperationCriteria) { FAssertLF(tree, "tree cannot be null"); + FAssertLF(leafLevelSeparationCriteria < 3, "Separation criteria should be < 3"); this->kernels = new KernelClass*[MaxThreads]; -#pragma omp parallel for schedule(static) - for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread){ -#pragma omp critical (InitFFmmAlgorithmThreadBalance) + #pragma omp parallel num_threads(MaxThreads) + { + #pragma omp critical (InitFFmmAlgorithmThreadBalance) { - this->kernels[idxThread] = new KernelClass(*inKernels); + this->kernels[omp_get_thread_num()] = new KernelClass(*inKernels); } } @@ -205,7 +207,7 @@ protected: do{ ++leafsNumber; const FTreeCoordinate& coord = octreeIterator.getCurrentCell()->getCoordinate(); - ++shapeLeaves[(coord.getX()%3)*9 + (coord.getY()%3)*3 + (coord.getZ()%3)]; + ++shapeLeaves[P2PExclusionClass::GetShapeIdx(coord)]; } while(octreeIterator.moveRight()); } @@ -346,6 +348,7 @@ protected: workloadBufferThread[omp_get_thread_num()] = new WorkloadTemp[leafsNumber]; } WorkloadTemp* workloadBuffer = workloadBufferThread[omp_get_thread_num()]; + memset(workloadBuffer, 0, sizeof(struct WorkloadTemp)*leafsNumber); // Prepare the P2P const int LeafIndex = OctreeHeight - 1; leafsDataArray.reset(new LeafData[leafsNumber]); @@ -365,7 +368,7 @@ protected: // for each leafs for(int idxLeaf = 0 ; idxLeaf < leafsNumber ; ++idxLeaf){ const FTreeCoordinate& coord = octreeIterator.getCurrentGlobalCoordinate(); - const int shapePosition = (coord.getX()%3)*9 + (coord.getY()%3)*3 + (coord.getZ()%3); + const int shapePosition = P2PExclusionClass::GetShapeIdx(coord); const int positionToWork = startPosAtShape[shapePosition]++; @@ -542,7 +545,7 @@ protected: // for each levels for(int idxLevel = FAbstractAlgorithm::upperWorkingLevel ; idxLevel < FAbstractAlgorithm::lowerWorkingLevel ; ++idxLevel ){ - const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeperationCriteria); + const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeparationCriteria); FLOG(FTic counterTimeLevel); FLOG(computationCounter.tic()); #pragma omp parallel diff --git a/Src/Core/FFmmAlgorithmThreadProc.hpp b/Src/Core/FFmmAlgorithmThreadProc.hpp index 649f1a21be80d7a424e6ad58e57f3fca1b6ad765..2218a1282aab1b6bee527817ef498115316b5583 100644 --- a/Src/Core/FFmmAlgorithmThreadProc.hpp +++ b/Src/Core/FFmmAlgorithmThreadProc.hpp @@ -40,6 +40,7 @@ #include <sys/time.h> #include "FCoreCommon.hpp" +#include "FP2PExclusion.hpp" #include <memory> @@ -63,7 +64,7 @@ * --tool=memcheck --leak-check=yes --show-reachable=yes --num-callers=20 * --track-fds=yes ./Tests/testFmmAlgorithmProc ../Data/testLoaderSmall.fma.tmp */ -template<class OctreeClass, class CellClass, class ContainerClass, class KernelClass, class LeafClass> +template<class OctreeClass, class CellClass, class ContainerClass, class KernelClass, class LeafClass, class P2PExclusionClass = FP2PMiddleExclusion> class FFmmAlgorithmThreadProc : public FAbstractAlgorithm, public FAlgorithmTimers { private: OctreeClass* const tree; ///< The octree to work on @@ -82,7 +83,7 @@ private: const int idProcess; ///< Current process id const int OctreeHeight; ///< Tree height - const int leafLevelSeperationCriteria; + const int leafLevelSeparationCriteria; /** An interval is the morton index interval * that a proc uses (i.e. it holds data in this interval) */ @@ -150,17 +151,18 @@ public: nbProcess(inComm.processCount()), idProcess(inComm.processId()), OctreeHeight(tree->getHeight()), - leafLevelSeperationCriteria(inLeafLevelSeperationCriteria), + leafLevelSeparationCriteria(inLeafLevelSeperationCriteria), intervals(new Interval[inComm.processCount()]), workingIntervalsPerLevel(new Interval[inComm.processCount() * tree->getHeight()]) { FAssertLF(tree, "tree cannot be null"); + FAssertLF(leafLevelSeparationCriteria < 3, "Separation criteria should be < 3"); this->kernels = new KernelClass*[MaxThreads]; - #pragma omp parallel for schedule(static) - for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread){ + #pragma omp parallel num_threads(MaxThreads) + { #pragma omp critical (InitFFmmAlgorithmThreadProc) { - this->kernels[idxThread] = new KernelClass(*inKernels); + this->kernels[omp_get_thread_num()] = new KernelClass(*inKernels); } } @@ -188,7 +190,10 @@ protected: */ void executeCore(const unsigned operationsToProceed) override { // Count leaf - this->numberOfLeafs = 0; +#ifdef SCALFMM_TRACE_ALGO + eztrace_start(); +#endif + this->numberOfLeafs = 0; { Interval myFullInterval; {//Building the interval with the first and last leaves (and count the number of leaves) @@ -260,31 +265,61 @@ protected: workingIntervalsPerLevel, int(sizeof(Interval)) * OctreeHeight, MPI_BYTE, comm.getComm()), __LINE__ ); } +#ifdef SCALFMM_TRACE_ALGO Timers[P2MTimer].tic(); + eztrace_enter_event("P2M", EZTRACE_YELLOW); +#endif if(operationsToProceed & FFmmP2M) bottomPass(); Timers[P2MTimer].tac(); +#ifdef SSCALFMM_TRACE_ALGO + eztrace_leave_event(); + eztrace_enter_event("M2M", EZTRACE_PINK); +#endif + Timers[M2MTimer].tic(); - if(operationsToProceed & FFmmM2M) upwardPass(); - Timers[M2MTimer].tac(); + if(operationsToProceed & FFmmM2M) upwardPass(); + Timers[M2MTimer].tac(); - Timers[M2LTimer].tic(); +#ifdef SCALFMM_TRACE_ALGO + eztrace_leave_event(); + eztrace_enter_event("M2L", EZTRACE_GREEN); +#endif + + Timers[M2LTimer].tic(); if(operationsToProceed & FFmmM2L) transferPass(); Timers[M2LTimer].tac(); - Timers[L2LTimer].tic(); + #ifdef SCALFMM_TRACE_ALGO + eztrace_leave_event(); + eztrace_enter_event("L2L", EZTRACE_PINK); +#endif + + Timers[L2LTimer].tic(); if(operationsToProceed & FFmmL2L) downardPass(); Timers[L2LTimer].tac(); - Timers[NearTimer].tic(); +#ifdef SCALFMM_TRACE_ALGO + eztrace_leave_event(); + eztrace_enter_event("L2P+P2P", EZTRACE_BLUE); +#endif + + Timers[NearTimer].tic(); if( (operationsToProceed & FFmmP2P) || (operationsToProceed & FFmmL2P) ) directPass((operationsToProceed & FFmmP2P),(operationsToProceed & FFmmL2P)); Timers[NearTimer].tac(); +#ifdef SCALFMM_TRACE_ALGO + eztrace_leave_event(); + eztrace_stop(); +#endif // delete array delete [] iterArray; - delete [] iterArrayComm; - iterArray = nullptr; + delete [] iterArrayComm; + iterArray = nullptr; iterArrayComm = nullptr; +#ifdef SCALFMM_TRACE_ALGO + eztrace_stop(); +#endif } ///////////////////////////////////////////////////////////////////////////// @@ -626,7 +661,7 @@ protected: // for each levels for(int idxLevel = FAbstractAlgorithm::upperWorkingLevel ; idxLevel < FAbstractAlgorithm::lowerWorkingLevel ; ++idxLevel ){ - const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeperationCriteria); + const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeparationCriteria); if(!procHasWorkAtLevel(idxLevel, idProcess)){ avoidGotoLeftIterator.moveDown(); @@ -784,7 +819,7 @@ protected: // Now we can compute all the data // for each levels for(int idxLevel = FAbstractAlgorithm::upperWorkingLevel ; idxLevel < FAbstractAlgorithm::lowerWorkingLevel ; ++idxLevel ){ - const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeperationCriteria); + const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeparationCriteria); if(!procHasWorkAtLevel(idxLevel, idProcess)){ avoidGotoLeftIterator.moveDown(); @@ -851,7 +886,7 @@ protected: // compute the second time // for each levels for(int idxLevel = FAbstractAlgorithm::upperWorkingLevel ; idxLevel < FAbstractAlgorithm::lowerWorkingLevel ; ++idxLevel ){ - const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeperationCriteria); + const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeparationCriteria); if(!procHasWorkAtLevel(idxLevel, idProcess)){ avoidGotoLeftIterator.moveDown(); @@ -1199,7 +1234,7 @@ protected: // init const int LeafIndex = OctreeHeight - 1; - const int SizeShape = 3*3*3; + const int SizeShape = P2PExclusionClass::SizeShape; int shapeLeaf[SizeShape]; memset(shapeLeaf,0,SizeShape*sizeof(int)); @@ -1360,7 +1395,7 @@ protected: myLeafs[idxLeaf] = octreeIterator; const FTreeCoordinate& coord = octreeIterator.getCurrentCell()->getCoordinate(); - const int shape = (coord.getX()%3)*9 + (coord.getY()%3)*3 + (coord.getZ()%3); + const int shape = P2PExclusionClass::GetShapeIdx(coord); shapeType[idxLeaf] = shape; ++shapeLeaf[shape]; diff --git a/Src/Core/FFmmAlgorithmThreadProcPeriodic.hpp b/Src/Core/FFmmAlgorithmThreadProcPeriodic.hpp index e2a0ab116633fcc4e5cb503c9ba97e0f52735eb2..ce9690d551b9e3190969d266ebea14e41e2a2377 100644 --- a/Src/Core/FFmmAlgorithmThreadProcPeriodic.hpp +++ b/Src/Core/FFmmAlgorithmThreadProcPeriodic.hpp @@ -38,6 +38,7 @@ #include <omp.h> #include "FCoreCommon.hpp" +#include "FP2PExclusion.hpp" #include <memory> @@ -61,7 +62,7 @@ * --tool=memcheck --leak-check=yes --show-reachable=yes --num-callers=20 --track-fds=yes * ./Tests/testFmmAlgorithmProc ../Data/testLoaderSmall.fma.tmp */ -template<class FReal, class OctreeClass, class CellClass, class ContainerClass, class KernelClass, class LeafClass> +template<class FReal, class OctreeClass, class CellClass, class ContainerClass, class KernelClass, class LeafClass, class P2PExclusionClass = FP2PMiddleExclusion> class FFmmAlgorithmThreadProcPeriodic : public FAbstractAlgorithm { OctreeClass* const tree; //< The octree to work on KernelClass** kernels; //< The kernels @@ -83,7 +84,7 @@ class FFmmAlgorithmThreadProcPeriodic : public FAbstractAlgorithm { const int OctreeHeight; - const int leafLevelSeperationCriteria; + const int leafLevelSeparationCriteria; public: struct Interval{ @@ -117,11 +118,11 @@ public: void setKernel(KernelClass*const inKernels){ this->kernels = new KernelClass*[MaxThreads]; - #pragma omp parallel for schedule(static) - for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread){ + #pragma omp parallel num_threads(MaxThreads) + { #pragma omp critical (InitFFmmAlgorithmThreadProcPeriodic) { - this->kernels[idxThread] = new KernelClass(*inKernels); + this->kernels[omp_get_thread_num()] = new KernelClass(*inKernels); } } } @@ -146,12 +147,13 @@ public: numberOfLeafs(0), MaxThreads(omp_get_max_threads()), nbProcess(inComm.processCount()), idProcess(inComm.processId()), OctreeHeight(tree->getHeight()), - leafLevelSeperationCriteria(inLeafLevelSeperationCriteria), + leafLevelSeparationCriteria(inLeafLevelSeperationCriteria), intervals(new Interval[inComm.processCount()]), workingIntervalsPerLevel(new Interval[inComm.processCount() * tree->getHeight()]) { FAssertLF(tree, "tree cannot be null"); FAssertLF(-1 <= inUpperLevel, "inUpperLevel cannot be < -1"); + FAssertLF(leafLevelSeparationCriteria < 3, "Separation criteria should be < 3"); FAbstractAlgorithm::setNbLevelsInTree(extendedTreeHeight()); @@ -787,7 +789,7 @@ protected: // Find the M2L neigbors of a cell const int counter = getPeriodicInteractionNeighbors(iterArray[idxCell].getCurrentGlobalCoordinate(), idxLevel, - neighborsIndexes, neighborsPosition, AllDirs, leafLevelSeperationCriteria); + neighborsIndexes, neighborsPosition, AllDirs, leafLevelSeparationCriteria); memset(alreadySent, false, sizeof(bool) * nbProcess); bool needOther = false; @@ -913,7 +915,7 @@ protected: for(int idxLevel = 1 ; idxLevel < OctreeHeight ; ++idxLevel ){ const int fackLevel = idxLevel + offsetRealTree; - const int separationCriteria = (idxLevel != OctreeHeight-1 ? 1 : leafLevelSeperationCriteria); + const int separationCriteria = (idxLevel != OctreeHeight-1 ? 1 : leafLevelSeparationCriteria); if(!procHasWorkAtLevel(idxLevel, idProcess)){ avoidGotoLeftIterator.moveDown(); @@ -981,7 +983,7 @@ protected: for(int idxLevel = 1 ; idxLevel < OctreeHeight ; ++idxLevel ){ const int fackLevel = idxLevel + offsetRealTree; - const int separationCriteria = (fackLevel != OctreeHeight-1 ? 1 : leafLevelSeperationCriteria); + const int separationCriteria = (fackLevel != OctreeHeight-1 ? 1 : leafLevelSeparationCriteria); if(!procHasWorkAtLevel(idxLevel, idProcess)){ avoidGotoLeftIterator.moveDown(); @@ -1347,7 +1349,7 @@ protected: // init const int LeafIndex = OctreeHeight - 1; - const int SizeShape = 3*3*3; + const int SizeShape = P2PExclusionClass::SizeShape; int shapeLeaf[SizeShape]; memset(shapeLeaf,0,SizeShape*sizeof(int)); @@ -1510,7 +1512,7 @@ protected: myLeafs[idxLeaf] = octreeIterator; const FTreeCoordinate& coord = octreeIterator.getCurrentCell()->getCoordinate(); - const int shape = (coord.getX()%3)*9 + (coord.getY()%3)*3 + (coord.getZ()%3); + const int shape = P2PExclusionClass::GetShapeIdx(coord); shapeType[idxLeaf] = shape; ++shapeLeaf[shape]; diff --git a/Src/Core/FFmmAlgorithmThreadTsm.hpp b/Src/Core/FFmmAlgorithmThreadTsm.hpp index 6bb6a268a25c1f58c012b7909d14d61b0121b213..761bb345a4c5e5886e7c3e463b659fdc60b816e9 100644 --- a/Src/Core/FFmmAlgorithmThreadTsm.hpp +++ b/Src/Core/FFmmAlgorithmThreadTsm.hpp @@ -55,7 +55,7 @@ class FFmmAlgorithmThreadTsm : public FAbstractAlgorithm, public FAlgorithmTimer const int OctreeHeight; - const int leafLevelSeperationCriteria; + const int leafLevelSeparationCriteria; public: /** The constructor need the octree and the kernels used for computation @@ -65,16 +65,17 @@ public: */ FFmmAlgorithmThreadTsm(OctreeClass* const inTree, KernelClass* const inKernels, const int inLeafLevelSeperationCriteria = 1) : tree(inTree) , kernels(nullptr), iterArray(nullptr), - MaxThreads(omp_get_max_threads()) , OctreeHeight(tree->getHeight()), leafLevelSeperationCriteria(inLeafLevelSeperationCriteria) { + MaxThreads(omp_get_max_threads()) , OctreeHeight(tree->getHeight()), leafLevelSeparationCriteria(inLeafLevelSeperationCriteria) { FAssertLF(tree, "tree cannot be null"); + FAssertLF(leafLevelSeparationCriteria < 3, "Separation criteria should be < 3"); this->kernels = new KernelClass*[MaxThreads]; - #pragma omp parallel for schedule(static) - for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread){ - #pragma omp critical (InitFFmmAlgorithmThreadTsm) + #pragma omp parallel num_threads(MaxThreads) + { + #pragma omp critical (InitFFmmAlgorithmTsm) { - this->kernels[idxThread] = new KernelClass(*inKernels); + this->kernels[omp_get_thread_num()] = new KernelClass(*inKernels); } } @@ -250,7 +251,7 @@ protected: // for each levels for(int idxLevel = FAbstractAlgorithm::upperWorkingLevel ; idxLevel < FAbstractAlgorithm::lowerWorkingLevel ; ++idxLevel ){ FLOG(FTic counterTimeLevel); - const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeperationCriteria); + const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeparationCriteria); int numberOfCells = 0; // for each cells diff --git a/Src/Core/FFmmAlgorithmTsm.hpp b/Src/Core/FFmmAlgorithmTsm.hpp index 80f238d68465a3ec2905c8892ce30797e6ea322e..62f0c8c3be55c45279f247e5ecc8f06e2745ca7f 100644 --- a/Src/Core/FFmmAlgorithmTsm.hpp +++ b/Src/Core/FFmmAlgorithmTsm.hpp @@ -46,7 +46,7 @@ class FFmmAlgorithmTsm : public FAbstractAlgorithm{ const int OctreeHeight; - const int leafLevelSeperationCriteria; + const int leafLevelSeparationCriteria; FLOG(FTic counterTime); //< In case of debug: to count the elapsed time FLOG(FTic computationCounter); //< In case of debug: to count computation time @@ -58,10 +58,11 @@ public: * An assert is launched if one of the arguments is null */ FFmmAlgorithmTsm(OctreeClass* const inTree, KernelClass* const inKernels, const int inLeafLevelSeperationCriteria = 1) - : tree(inTree) , kernels(inKernels) , OctreeHeight(tree->getHeight()), leafLevelSeperationCriteria(inLeafLevelSeperationCriteria){ + : tree(inTree) , kernels(inKernels) , OctreeHeight(tree->getHeight()), leafLevelSeparationCriteria(inLeafLevelSeperationCriteria){ FAssertLF(tree, "tree cannot be null"); FAssertLF(kernels, "kernels cannot be null"); + FAssertLF(leafLevelSeparationCriteria < 3, "Separation criteria should be < 3"); FAbstractAlgorithm::setNbLevelsInTree(tree->getHeight()); @@ -200,7 +201,7 @@ protected: // for each levels for(int idxLevel = FAbstractAlgorithm::upperWorkingLevel ; idxLevel < FAbstractAlgorithm::lowerWorkingLevel ; ++idxLevel ){ FLOG(FTic counterTimeLevel); - const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeperationCriteria); + const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeparationCriteria); // for each cells do{ FLOG(computationCounter.tic()); diff --git a/Src/Core/FP2PExclusion.hpp b/Src/Core/FP2PExclusion.hpp new file mode 100644 index 0000000000000000000000000000000000000000..329f1c42c802b688d179acfe4118dcb71af8e029 --- /dev/null +++ b/Src/Core/FP2PExclusion.hpp @@ -0,0 +1,50 @@ +#ifndef FP2PEXCLUSION_HPP +#define FP2PEXCLUSION_HPP + +#include "../Containers/FTreeCoordinate.hpp" + +/** + * This class gives is responsible of the separation of the leaves + * using the coloring algorithm. + * In case of classic P2P and mutual interaction the BoxSeparations = 2 should be used. + * For our current mutual P2P is is a little more complicated because we need + * 2 boxes of separation but only in some directions. + */ +template <int BoxSeparations = 2> +class FP2PExclusion{ +public: + static const int BoxesPerDim = (BoxSeparations+1); + static const int SizeShape = BoxesPerDim*BoxesPerDim*BoxesPerDim; + + static int GetShapeIdx(const int inX, const int inY, const int inZ){ + return (inX%BoxesPerDim)*(BoxesPerDim*BoxesPerDim) + (inY%BoxesPerDim)*BoxesPerDim + (inZ%BoxesPerDim); + } + + static int GetShapeIdx(const FTreeCoordinate& coord){ + return GetShapeIdx(coord.getX(), coord.getY(), coord.getZ()); + } +}; + +/** + * Here the formula is related to the octree construction of neighbors list: + * const int index = (((idxX + 1) * 3) + (idxY +1)) * 3 + idxZ + 1; + * If go from 0 to 27, + * if we loop from 0 to 14, then we need "x" in [0;2[ + * "y" "z" in [0;3[ + */ +class FP2PMiddleExclusion{ +public: + static const int SizeShape = 3*3*2; + + static int GetShapeIdx(const int inX, const int inY, const int inZ){ + return (inX%2)*9 + (inY%3)*3 + (inZ%3); + } + + static int GetShapeIdx(const FTreeCoordinate& coord){ + return GetShapeIdx(coord.getX(), coord.getY(), coord.getZ()); + } +}; + + +#endif // FP2PEXCLUSION_HPP + diff --git a/Src/ScalFmmConfig.h.cmake b/Src/ScalFmmConfig.h.cmake index d23bddf1d47a78a930b93869698b9e901987c0ae..3450820790230429c52ec9ee75aa9c58225b1072 100644 --- a/Src/ScalFmmConfig.h.cmake +++ b/Src/ScalFmmConfig.h.cmake @@ -86,7 +86,12 @@ /////////////////////////////////////////////////////// #cmakedefine SCALFMM_USE_EZTRACE +#cmakedefine SCALFMM_TRACE_ALGO +#cmakedefine SCALFMM_TRACE_P2P +#cmakedefine SCALFMM_TRACE_P2M #cmakedefine SCALFMM_TRACE_M2L +#cmakedefine SCALFMM_TRACE_L2L +#cmakedefine SCALFMM_TRACE_L2P /////////////////////////////////////////////////////// diff --git a/UTests/utestP2PExclusion.cpp b/UTests/utestP2PExclusion.cpp new file mode 100644 index 0000000000000000000000000000000000000000..ecf03d65206dfc51801c350f83ad46ed882417b7 --- /dev/null +++ b/UTests/utestP2PExclusion.cpp @@ -0,0 +1,126 @@ + +// =================================================================================== +// Copyright ScalFmm 2011 INRIA, Olivier Coulaud, Bérenger Bramas, Matthias Messner +// olivier.coulaud@inria.fr, berenger.bramas@inria.fr +// This software is a computer program whose purpose is to compute the FMM. +// +// This software is governed by the CeCILL-C and LGPL licenses and +// abiding by the rules of distribution of free software. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public and CeCILL-C Licenses for more details. +// "http://www.cecill.info". +// "http://www.gnu.org/licenses". +// =================================================================================== +#include "FUTester.hpp" + +#include "Core/FP2PExclusion.hpp" +#include "Utils/FMath.hpp" + +#include <memory> + +/** +* This file is a unit test for the FNeigborIndexes classes +*/ + + +/** this class test the list container */ +class TestExclusion : public FUTester<TestExclusion> { + const int Size = 100; + + void Exclusion2(){ + const int Width = 2; + std::unique_ptr<int[]> grid(new int[Size*Size*Size]); + for(int idxShape = 0 ; idxShape < FP2PExclusion<Width>::SizeShape ; ++idxShape){ + memset(grid.get(), 0, sizeof(int)*Size*Size*Size); + + for(int idxX = 0 ; idxX < Size ; ++idxX){ + for(int idxY = 0 ; idxY < Size ; ++idxY){ + for(int idxZ = 0 ; idxZ < Size ; ++idxZ){ + if(FP2PExclusion<Width>::GetShapeIdx(idxX,idxY,idxZ) == idxShape){ + for(int idxX_neig = FMath::Max(0,idxX-1) ; idxX_neig < FMath::Min(Size,idxX+1) ; ++idxX_neig){ + for(int idxY_neig = FMath::Max(0,idxY-1) ; idxY_neig < FMath::Min(Size,idxY+1) ; ++idxY_neig){ + for(int idxZ_neig = FMath::Max(0,idxZ-1) ; idxZ_neig < FMath::Min(Size,idxZ+1) ; ++idxZ_neig){ + uassert(grid[(idxX_neig*Size + idxY_neig)*Size + idxZ_neig] == 0); + grid[grid[(idxX_neig*Size + idxY_neig)*Size + idxZ_neig]] = 1; + } + } + } + } + } + } + } + } + } + + void Exclusion1(){ + const int Width = 1; + std::unique_ptr<int[]> grid(new int[Size*Size*Size]); + for(int idxShape = 0 ; idxShape < FP2PExclusion<Width>::SizeShape ; ++idxShape){ + memset(grid.get(), 0, sizeof(int)*Size*Size*Size); + + for(int idxX = 0 ; idxX < Size ; ++idxX){ + for(int idxY = 0 ; idxY < Size ; ++idxY){ + for(int idxZ = 0 ; idxZ < Size ; ++idxZ){ + if(FP2PExclusion<Width>::GetShapeIdx(idxX,idxY,idxZ) == idxShape){ + for(int idxX_neig = FMath::Max(0,idxX-1) ; idxX_neig < idxX ; ++idxX_neig){ + for(int idxY_neig = FMath::Max(0,idxY-1) ; idxY_neig < idxY ; ++idxY_neig){ + for(int idxZ_neig = FMath::Max(0,idxZ-1) ; idxZ_neig < idxZ ; ++idxZ_neig){ + uassert(grid[(idxX_neig*Size + idxY_neig)*Size + idxZ_neig] == 0); + grid[grid[(idxX_neig*Size + idxY_neig)*Size + idxZ_neig]] = 1; + } + } + } + } + } + } + } + } + } + + void Middle(){ + std::unique_ptr<int[]> grid(new int[Size*Size*Size]); + for(int idxShape = 0 ; idxShape < FP2PMiddleExclusion::SizeShape ; ++idxShape){ + memset(grid.get(), 0, sizeof(int)*Size*Size*Size); + + for(int idxX = 0 ; idxX < Size ; ++idxX){ + for(int idxY = 0 ; idxY < Size ; ++idxY){ + for(int idxZ = 0 ; idxZ < Size ; ++idxZ){ + if(FP2PMiddleExclusion::GetShapeIdx(idxX,idxY,idxZ) == idxShape){ + for(int idxX_neig = FMath::Max(0,idxX-1) ; idxX_neig < FMath::Min(Size,idxX+1) ; ++idxX_neig){ + for(int idxY_neig = FMath::Max(0,idxY-1) ; idxY_neig < FMath::Min(Size,idxY+1) ; ++idxY_neig){ + for(int idxZ_neig = FMath::Max(0,idxZ-1) ; idxZ_neig < FMath::Min(Size,idxZ+1) ; ++idxZ_neig){ + const int diffx = idxX_neig-idxX; + const int diffy = idxY_neig-idxY; + const int diffz = idxZ_neig-idxZ; + const int idx = (diffx+1)*9 + (diffy+1)*3 + (diffz+1); + if(idx < 14){ + uassert(grid[(idxX_neig*Size + idxY_neig)*Size + idxZ_neig] == 0); + grid[grid[(idxX_neig*Size + idxY_neig)*Size + idxZ_neig]] = 1; + } + } + } + } + } + } + } + } + } + } + + + // set test + void SetTests(){ + AddTest(&TestExclusion::Exclusion2,"Test 2 exclustion"); + AddTest(&TestExclusion::Exclusion1,"Test 1 exclustion"); + AddTest(&TestExclusion::Middle,"Test middle exclustion"); + } +}; + +// You must do this +TestClass(TestExclusion) + + + diff --git a/Utils/noDist/ChebyshevInterpolationCmpAlgo.cpp b/Utils/noDist/ChebyshevInterpolationCmpAlgo.cpp index c6f1c667cdd7418d648f366629d05c3d48cbf5ec..5a039bd88a6574fbfe743805ceec35cd5b9746bc 100644 --- a/Utils/noDist/ChebyshevInterpolationCmpAlgo.cpp +++ b/Utils/noDist/ChebyshevInterpolationCmpAlgo.cpp @@ -177,7 +177,7 @@ int main(int argc, char* argv[]) std::string algoStr = FParameters::getStr(argc,argv,"-algo", "basic"); ForFmmClass algo1(&tree, &kernels, inUserChunckSize); - ForBalFmmClass algo4(&tree, &kernels, inUserChunckSize); + ForBalFmmClass algo4(&tree, &kernels); TaskFmmClass algo2(&tree, &kernels ); SectionTaskFmmClass algo3(&tree, &kernels ); @@ -203,8 +203,8 @@ int main(int argc, char* argv[]) time.tic(); // --------------------------------------------- // algo->execute(FFmmNearField); // Here the call of the FMM algorithm - algo->execute(FFmmFarField); // Here the call of the FMM algorithm -// algo->execute(); // Here the call of the FMM algorithm +// algo->execute(FFmmFarField); // Here the call of the FMM algorithm + algo->execute(); // Here the call of the FMM algorithm // --------------------------------------------- time.tac(); std::cout << "Timers Far Field \n"