diff --git a/CMakeModules/morse/find/FindBLAS.cmake b/CMakeModules/morse/find/FindBLAS.cmake index f361172c983f54c384649531ac92a4d6ea12eff8..37fff16a3e136f16fb2ff6968d08b91cca9c1639 100644 --- a/CMakeModules/morse/find/FindBLAS.cmake +++ b/CMakeModules/morse/find/FindBLAS.cmake @@ -3,7 +3,7 @@ # @copyright (c) 2009-2014 The University of Tennessee and The University # of Tennessee Research Foundation. # All rights reserved. -# @copyright (c) 2012-2014 Inria. All rights reserved. +# @copyright (c) 2012-2016 Inria. All rights reserved. # @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. # ### @@ -40,6 +40,7 @@ # are not given as cmake variable: BLAS_DIR, BLAS_INCDIR, BLAS_LIBDIR # For MKL case and if no paths are given as hints, we will try to use the MKLROOT # environment variable +# BLAS_VERBOSE Print some additional information during BLAS libraries detection ########## ### List of vendors (BLA_VENDOR) valid in this module ########## List of vendors (BLA_VENDOR) valid in this module @@ -77,28 +78,6 @@ # (To distribute this file outside of CMake, substitute the full # License text for the above reference.) - -# Set some colors -#if(NOT WIN32) -# string(ASCII 27 Esc) -# set(ColourReset "${Esc}[m") -# set(ColourBold "${Esc}[1m") -# set(Red "${Esc}[31m") -# set(Green "${Esc}[32m") -# set(Yellow "${Esc}[33m") -# set(Blue "${Esc}[34m") -# set(Magenta "${Esc}[35m") -# set(Cyan "${Esc}[36m") -# set(White "${Esc}[37m") -# set(BoldRed "${Esc}[1;31m") -# set(BoldGreen "${Esc}[1;32m") -# set(BoldYellow "${Esc}[1;33m") -# set(BoldBlue "${Esc}[1;34m") -# set(BoldMagenta "${Esc}[1;35m") -# set(BoldCyan "${Esc}[1;36m") -# set(BoldWhite "${Esc}[1;37m") -#endif() - ## Some macros to print status when search for headers and libs # This macro informs why the _lib_to_find file has not been found macro(Print_Find_Library_Blas_Status _libname _lib_to_find) @@ -1252,7 +1231,7 @@ endif (BLA_VENDOR STREQUAL "NAS" OR BLA_VENDOR STREQUAL "All") # Generic BLAS library? if (BLA_VENDOR STREQUAL "Generic" OR BLA_VENDOR STREQUAL "All") - set(BLAS_SEARCH_LIBS "blas;blas_LINUX;blas_MAC;blas_WINDOWS") + set(BLAS_SEARCH_LIBS "blas;blas_LINUX;blas_MAC;blas_WINDOWS;refblas") foreach (SEARCH_LIB ${BLAS_SEARCH_LIBS}) if (BLAS_LIBRARIES) else () diff --git a/CMakeModules/morse/find/FindBLASEXT.cmake b/CMakeModules/morse/find/FindBLASEXT.cmake index 4169efd07382dfaa07d1a212c6b628064f4b6baf..4a1a8c5624e64f2b7e97b7919b42ca570f07d01e 100644 --- a/CMakeModules/morse/find/FindBLASEXT.cmake +++ b/CMakeModules/morse/find/FindBLASEXT.cmake @@ -3,7 +3,7 @@ # @copyright (c) 2009-2014 The University of Tennessee and The University # of Tennessee Research Foundation. # All rights reserved. -# @copyright (c) 2012-2014 Inria. All rights reserved. +# @copyright (c) 2012-2016 Inria. All rights reserved. # @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. # ### @@ -13,8 +13,8 @@ # This module allows to find BLAS libraries by calling the official FindBLAS module # and handles the creation of different library lists whether the user wishes to link # with a sequential BLAS or a multihreaded (BLAS_SEQ_LIBRARIES and BLAS_PAR_LIBRARIES). -# BLAS is detected with a FindBLAS call then if the BLAS vendor is Intel10_64lp or ACML -# then the module tries to find the corresponding multithreaded libraries. +# BLAS is detected with a FindBLAS call then if the BLAS vendor is Intel10_64lp, ACML +# or IBMESSLMT then the module attempts to find the corresponding multithreaded libraries. # # The following variables have been added to manage links with sequential or multithreaded # versions: @@ -28,7 +28,7 @@ # Copyright 2012-2013 Emmanuel Agullo # Copyright 2012-2013 Mathieu Faverge # Copyright 2012 Cedric Castagnede -# Copyright 2013 Florent Pruvost +# Copyright 2013-2016 Florent Pruvost # # Distributed under the OSI-approved BSD License (the "License"); # see accompanying file MORSE-Copyright.txt for details. @@ -132,6 +132,10 @@ if(BLA_VENDOR MATCHES "Intel*") endif() list(APPEND _inc_env "${CMAKE_PLATFORM_IMPLICIT_INCLUDE_DIRECTORIES}") list(APPEND _inc_env "${CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES}") + set(ENV_MKLROOT "$ENV{MKLROOT}") + if (ENV_MKLROOT) + list(APPEND _inc_env "${ENV_MKLROOT}/include") + endif() list(REMOVE_DUPLICATES _inc_env) # find mkl.h inside known include paths diff --git a/CMakeModules/morse/find/FindCBLAS.cmake b/CMakeModules/morse/find/FindCBLAS.cmake index abfdf8db86c66adb276d9797da3109f2bc621cc0..e0c3284df3bd3e96c743d3f8dd54e3d1211d5347 100644 --- a/CMakeModules/morse/find/FindCBLAS.cmake +++ b/CMakeModules/morse/find/FindCBLAS.cmake @@ -3,7 +3,7 @@ # @copyright (c) 2009-2014 The University of Tennessee and The University # of Tennessee Research Foundation. # All rights reserved. -# @copyright (c) 2012-2014 Inria. All rights reserved. +# @copyright (c) 2012-2016 Inria. All rights reserved. # @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. # ### @@ -18,11 +18,6 @@ # CBLAS depends on the following libraries: # - BLAS # -# COMPONENTS are optional libraries LAPACKE could be linked with, -# Use it to drive detection of a specific compilation chain -# COMPONENTS can be some of the following: -# - BLASEXT: to activate detection of BLAS with BLASEXT cmake module -# # This module finds headers and cblas library. # Results are reported in variables: # CBLAS_FOUND - True if headers and requested libraries were found @@ -67,7 +62,7 @@ # Copyright 2012-2013 Emmanuel Agullo # Copyright 2012-2013 Mathieu Faverge # Copyright 2012 Cedric Castagnede -# Copyright 2013 Florent Pruvost +# Copyright 2013-2016 Florent Pruvost # # Distributed under the OSI-approved BSD License (the "License"); # see accompanying file MORSE-Copyright.txt for details. @@ -88,30 +83,12 @@ if (NOT CBLAS_FOUND) endif() -# CBLAS may depend on BLASEXT -# try to find it specified as COMPONENTS during the call -if (CBLAS_FIND_COMPONENTS) - foreach( component ${CBLAS_FIND_COMPONENTS} ) - if(CBLAS_FIND_REQUIRED_${component}) - find_package(${component} REQUIRED) - else() - find_package(${component}) - endif() - if(${component}_FOUND) - set(CBLAS_${component}_FOUND TRUE) - else() - set(CBLAS_${component}_FOUND FALSE) - endif() - endforeach() -endif () - - # CBLAS depends on BLAS anyway, try to find it if (NOT BLAS_FOUND) if(CBLAS_FIND_REQUIRED) - find_package(BLAS REQUIRED) + find_package(BLASEXT REQUIRED) else() - find_package(BLAS) + find_package(BLASEXT) endif() endif() diff --git a/CMakeModules/morse/find/FindCHAMELEON.cmake b/CMakeModules/morse/find/FindCHAMELEON.cmake index 7e20d8223f4aa3f6a45eb74e4f1486134ff0af2e..d9e4415dd264d2cd2a48a3f4ddfd619d0588ed05 100644 --- a/CMakeModules/morse/find/FindCHAMELEON.cmake +++ b/CMakeModules/morse/find/FindCHAMELEON.cmake @@ -3,7 +3,7 @@ # @copyright (c) 2009-2014 The University of Tennessee and The University # of Tennessee Research Foundation. # All rights reserved. -# @copyright (c) 2012-2014 Inria. All rights reserved. +# @copyright (c) 2012-2016 Inria. All rights reserved. # @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. # ### @@ -56,7 +56,7 @@ # Copyright 2012-2013 Emmanuel Agullo # Copyright 2012-2013 Mathieu Faverge # Copyright 2012 Cedric Castagnede -# Copyright 2013 Florent Pruvost +# Copyright 2013-2016 Florent Pruvost # # Distributed under the OSI-approved BSD License (the "License"); # see accompanying file MORSE-Copyright.txt for details. @@ -245,9 +245,9 @@ if( (NOT PKG_CONFIG_EXECUTABLE) OR (PKG_CONFIG_EXECUTABLE AND NOT CHAMELEON_FOUN message(STATUS "Looking for CHAMELEON - Try to detect CBLAS (depends on BLAS)") endif() if (CHAMELEON_FIND_REQUIRED) - find_package(CBLAS REQUIRED COMPONENTS BLASEXT) + find_package(CBLAS REQUIRED) else() - find_package(CBLAS COMPONENTS BLASEXT) + find_package(CBLAS) endif() # CHAMELEON depends on LAPACKE @@ -261,9 +261,9 @@ if( (NOT PKG_CONFIG_EXECUTABLE) OR (PKG_CONFIG_EXECUTABLE AND NOT CHAMELEON_FOUN message(STATUS "Looking for CHAMELEON - Try to detect LAPACKE (depends on LAPACK)") endif() if (CHAMELEON_FIND_REQUIRED) - find_package(LAPACKE REQUIRED COMPONENTS LAPACKEXT) + find_package(LAPACKE REQUIRED) else() - find_package(LAPACKE COMPONENTS LAPACKEXT) + find_package(LAPACKE) endif() # CHAMELEON depends on TMG diff --git a/CMakeModules/morse/find/FindLAPACK.cmake b/CMakeModules/morse/find/FindLAPACK.cmake index c9da7bd9dc8630af8f5ddc5bfb4dc393fd089bf5..59415f4581b8be876bc00632310659e16e721f55 100644 --- a/CMakeModules/morse/find/FindLAPACK.cmake +++ b/CMakeModules/morse/find/FindLAPACK.cmake @@ -3,7 +3,7 @@ # @copyright (c) 2009-2014 The University of Tennessee and The University # of Tennessee Research Foundation. # All rights reserved. -# @copyright (c) 2012-2014 Inria. All rights reserved. +# @copyright (c) 2012-2016 Inria. All rights reserved. # @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. # ### @@ -57,27 +57,6 @@ # License text for the above reference.) -# Set some colors -#if(NOT WIN32) -# string(ASCII 27 Esc) -# set(ColourReset "${Esc}[m") -# set(ColourBold "${Esc}[1m") -# set(Red "${Esc}[31m") -# set(Green "${Esc}[32m") -# set(Yellow "${Esc}[33m") -# set(Blue "${Esc}[34m") -# set(Magenta "${Esc}[35m") -# set(Cyan "${Esc}[36m") -# set(White "${Esc}[37m") -# set(BoldRed "${Esc}[1;31m") -# set(BoldGreen "${Esc}[1;32m") -# set(BoldYellow "${Esc}[1;33m") -# set(BoldBlue "${Esc}[1;34m") -# set(BoldMagenta "${Esc}[1;35m") -# set(BoldCyan "${Esc}[1;36m") -# set(BoldWhite "${Esc}[1;37m") -#endif() - ## Some macros to print status when search for headers and libs # This macro informs why the _lib_to_find file has not been found macro(Print_Find_Library_Blas_Status _libname _lib_to_find) @@ -319,9 +298,9 @@ set(LAPACK95_LIBRARIES) if (NOT BLAS_FOUND) if(LAPACK_FIND_QUIETLY OR NOT LAPACK_FIND_REQUIRED) - find_package(BLAS) + find_package(BLASEXT) else(LAPACK_FIND_QUIETLY OR NOT LAPACK_FIND_REQUIRED) - find_package(BLAS REQUIRED) + find_package(BLASEXT REQUIRED) endif(LAPACK_FIND_QUIETLY OR NOT LAPACK_FIND_REQUIRED) endif () diff --git a/CMakeModules/morse/find/FindLAPACKE.cmake b/CMakeModules/morse/find/FindLAPACKE.cmake index b6ea2571bfa26128e63a9797624edbfaf542c345..3e7686870e446b6281b796828017785ef70d350f 100644 --- a/CMakeModules/morse/find/FindLAPACKE.cmake +++ b/CMakeModules/morse/find/FindLAPACKE.cmake @@ -3,7 +3,7 @@ # @copyright (c) 2009-2014 The University of Tennessee and The University # of Tennessee Research Foundation. # All rights reserved. -# @copyright (c) 2012-2014 Inria. All rights reserved. +# @copyright (c) 2012-2016 Inria. All rights reserved. # @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. # ### @@ -18,11 +18,6 @@ # LAPACKE depends on the following libraries: # - LAPACK # -# COMPONENTS are optional libraries LAPACKE could be linked with, -# Use it to drive detection of a specific compilation chain -# COMPONENTS available: -# - LAPACKEXT: to activate detection of LAPACK with LAPACKEXT cmake module -# # This module finds headers and lapacke library. # Results are reported in variables: # LAPACKE_FOUND - True if headers and requested libraries were found @@ -54,7 +49,7 @@ # Copyright 2012-2013 Emmanuel Agullo # Copyright 2012-2013 Mathieu Faverge # Copyright 2012 Cedric Castagnede -# Copyright 2013 Florent Pruvost +# Copyright 2013-2016 Florent Pruvost # # Distributed under the OSI-approved BSD License (the "License"); # see accompanying file MORSE-Copyright.txt for details. @@ -73,29 +68,12 @@ if (NOT LAPACKE_FOUND) endif() endif() -# LAPACKE depends on LAPACKEXT -# try to find it specified as COMPONENTS during the call -if (LAPACKE_FIND_COMPONENTS) - foreach( component ${LAPACKE_FIND_COMPONENTS} ) - if(LAPACKE_FIND_REQUIRED_${component}) - find_package(${component} REQUIRED) - else() - find_package(${component}) - endif() - if(${component}_FOUND) - set(LAPACKE_${component}_FOUND TRUE) - else() - set(LAPACKE_${component}_FOUND FALSE) - endif() - endforeach() -endif () - # LAPACKE depends on LAPACK anyway, try to find it if (NOT LAPACK_FOUND) if(LAPACKE_FIND_REQUIRED) - find_package(LAPACK REQUIRED) + find_package(LAPACKEXT REQUIRED) else() - find_package(LAPACK) + find_package(LAPACKEXT) endif() endif() diff --git a/CMakeModules/morse/find/FindLAPACKEXT.cmake b/CMakeModules/morse/find/FindLAPACKEXT.cmake index c494fb7e990d61b2cd16d7ffba354fc32c1e22f2..4270ce8dea4e8b32845c36f17df2062f6921a3d5 100644 --- a/CMakeModules/morse/find/FindLAPACKEXT.cmake +++ b/CMakeModules/morse/find/FindLAPACKEXT.cmake @@ -76,6 +76,10 @@ if(BLA_VENDOR MATCHES "Intel*") endif() list(APPEND _inc_env "${CMAKE_PLATFORM_IMPLICIT_INCLUDE_DIRECTORIES}") list(APPEND _inc_env "${CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES}") + set(ENV_MKLROOT "$ENV{MKLROOT}") + if (ENV_MKLROOT) + list(APPEND _inc_env "${ENV_MKLROOT}/include") + endif() list(REMOVE_DUPLICATES _inc_env) if (BLAS_DIR) diff --git a/CMakeModules/morse/find/FindMAGMA.cmake b/CMakeModules/morse/find/FindMAGMA.cmake index e8bdd072ddf18c8ccf7cd0d2282efe3919278218..54d02d4b93a3cf82f9f580184d2e9106f1c6a871 100644 --- a/CMakeModules/morse/find/FindMAGMA.cmake +++ b/CMakeModules/morse/find/FindMAGMA.cmake @@ -3,7 +3,7 @@ # @copyright (c) 2009-2014 The University of Tennessee and The University # of Tennessee Research Foundation. # All rights reserved. -# @copyright (c) 2012-2014 Inria. All rights reserved. +# @copyright (c) 2012-2016 Inria. All rights reserved. # @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. # ### @@ -48,7 +48,7 @@ # Copyright 2012-2013 Emmanuel Agullo # Copyright 2012-2013 Mathieu Faverge # Copyright 2012 Cedric Castagnede -# Copyright 2013 Florent Pruvost +# Copyright 2013-2016 Florent Pruvost # # Distributed under the OSI-approved BSD License (the "License"); # see accompanying file MORSE-Copyright.txt for details. @@ -86,9 +86,9 @@ endif() # MAGMA depends on LAPACK anyway, try to find it if (NOT LAPACK_FOUND) if(MAGMA_FIND_REQUIRED) - find_package(LAPACK REQUIRED) + find_package(LAPACKEXT REQUIRED) else() - find_package(LAPACK) + find_package(LAPACKEXT) endif() endif() # MAGMA depends on CBLAS anyway, try to find it diff --git a/CMakeModules/morse/find/FindMUMPS.cmake b/CMakeModules/morse/find/FindMUMPS.cmake index 7e7afe7e0b2876dfb57d2cc8b5b2213fb067107e..5cca0d649a11b185f690194dbab2b0375a61fdea 100644 --- a/CMakeModules/morse/find/FindMUMPS.cmake +++ b/CMakeModules/morse/find/FindMUMPS.cmake @@ -112,33 +112,6 @@ if (NOT MUMPS_FIND_QUIETLY) message(STATUS "Looking for MUMPS - PkgConfig not used") endif() -# Dependencies detection -# ---------------------- - -# Add system library paths to search lib -# -------------------------------------- -unset(_lib_env) -set(ENV_MUMPS_LIBDIR "$ENV{MUMPS_LIBDIR}") -if(ENV_MUMPS_LIBDIR) - list(APPEND _lib_env "${ENV_MUMPS_LIBDIR}") -elseif(ENV_MUMPS_DIR) - list(APPEND _lib_env "${ENV_MUMPS_DIR}") - list(APPEND _lib_env "${ENV_MUMPS_DIR}/lib") -else() - if(WIN32) - string(REPLACE ":" ";" _lib_env "$ENV{LIB}") - else() - if(APPLE) - string(REPLACE ":" ";" _lib_env "$ENV{DYLD_LIBRARY_PATH}") - else() - string(REPLACE ":" ";" _lib_env "$ENV{LD_LIBRARY_PATH}") - endif() - list(APPEND _lib_env "${CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES}") - list(APPEND _lib_env "${CMAKE_C_IMPLICIT_LINK_DIRECTORIES}") - endif() -endif() -list(REMOVE_DUPLICATES _lib_env) - # Required dependencies # --------------------- @@ -212,14 +185,14 @@ endif (NOT SCALAPACK_FOUND AND MUMPS_LOOK_FOR_MPI) # MUMPS may depends on SCOTCH #---------------------------- -if (NOT SCOTCH_FOUND AND MUMPS_LOOK_FOR_SCOTCH) +if (MUMPS_LOOK_FOR_SCOTCH) if (NOT MUMPS_FIND_QUIETLY) - message(STATUS "Looking for MUMPS - Try to detect SCOTCH") + message(STATUS "Looking for MUMPS - Try to detect SCOTCH with esmumps") endif() if (MUMPS_FIND_REQUIRED AND MUMPS_FIND_REQUIRED_SCOTCH) - find_package(SCOTCH REQUIRED) + find_package(SCOTCH REQUIRED COMPONENTS ESMUMPS) else() - find_package(SCOTCH) + find_package(SCOTCH COMPONENTS ESMUMPS) endif() endif() @@ -240,44 +213,92 @@ endif() # Looking for MUMPS # ----------------- +# Add system include paths to search include +# ------------------------------------------ +unset(_inc_env) +set(ENV_MUMPS_DIR "$ENV{MUMPS_DIR}") +set(ENV_MUMPS_INCDIR "$ENV{MUMPS_INCDIR}") +if(ENV_MUMPS_INCDIR) + list(APPEND _inc_env "${ENV_MUMPS_INCDIR}") +elseif(ENV_MUMPS_DIR) + list(APPEND _inc_env "${ENV_MUMPS_DIR}") + list(APPEND _inc_env "${ENV_MUMPS_DIR}/include") + list(APPEND _inc_env "${ENV_MUMPS_DIR}/include/mumps") +else() + if(WIN32) + string(REPLACE ":" ";" _inc_env "$ENV{INCLUDE}") + else() + string(REPLACE ":" ";" _path_env "$ENV{INCLUDE}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{C_INCLUDE_PATH}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{CPATH}") + list(APPEND _inc_env "${_path_env}") + string(REPLACE ":" ";" _path_env "$ENV{INCLUDE_PATH}") + list(APPEND _inc_env "${_path_env}") + endif() +endif() +list(APPEND _inc_env "${CMAKE_PLATFORM_IMPLICIT_INCLUDE_DIRECTORIES}") +list(APPEND _inc_env "${CMAKE_C_IMPLICIT_INCLUDE_DIRECTORIES}") +list(REMOVE_DUPLICATES _inc_env) + +# Add system library paths to search lib +# -------------------------------------- +unset(_lib_env) +set(ENV_MUMPS_LIBDIR "$ENV{MUMPS_LIBDIR}") +if(ENV_MUMPS_LIBDIR) + list(APPEND _lib_env "${ENV_MUMPS_LIBDIR}") +elseif(ENV_MUMPS_DIR) + list(APPEND _lib_env "${ENV_MUMPS_DIR}") + list(APPEND _lib_env "${ENV_MUMPS_DIR}/lib") +else() + if(WIN32) + string(REPLACE ":" ";" _lib_env "$ENV{LIB}") + else() + if(APPLE) + string(REPLACE ":" ";" _lib_env "$ENV{DYLD_LIBRARY_PATH}") + else() + string(REPLACE ":" ";" _lib_env "$ENV{LD_LIBRARY_PATH}") + endif() + list(APPEND _lib_env "${CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES}") + list(APPEND _lib_env "${CMAKE_C_IMPLICIT_LINK_DIRECTORIES}") + endif() +endif() +list(REMOVE_DUPLICATES _lib_env) + # Looking for include # ------------------- # Try to find the mumps header in the given path # ---------------------------------------------- + +# create list of headers to find +list(APPEND MUMPS_hdrs_to_find "smumps_c.h;dmumps_c.h;cmumps_c.h;zmumps_c.h") + # call cmake macro to find the header path -if(MUMPS_DIR) - set(MUMPS_smumps_c.h_DIRS "MUMPS_smumps_c.h_DIRS-NOTFOUND") - find_path(MUMPS_smumps_c.h_DIRS - NAMES smumps_c.h - HINTS "${MUMPS_DIR}" "$ENV{MUMPS_DIR}" - PATH_SUFFIXES "include") - set(MUMPS_dmumps_c.h_DIRS "MUMPS_dmumps_c.h_DIRS-NOTFOUND") - find_path(MUMPS_dmumps_c.h_DIRS - NAMES dmumps_c.h - HINTS "${MUMPS_DIR}" "$ENV{MUMPS_DIR}" - PATH_SUFFIXES "include") - set(MUMPS_cmumps_c.h_DIRS "MUMPS_cmumps_c.h_DIRS-NOTFOUND") - find_path(MUMPS_cmumps_c.h_DIRS - NAMES cmumps_c.h - HINTS "${MUMPS_DIR}" "$ENV{MUMPS_DIR}" - PATH_SUFFIXES "include") - set(MUMPS_zmumps_c.h_DIRS "MUMPS_zmumps_c.h_DIRS-NOTFOUND") - find_path(MUMPS_zmumps_c.h_DIRS - NAMES zmumps_c.h - HINTS "${MUMPS_DIR}" "$ENV{MUMPS_DIR}" - PATH_SUFFIXES "include") +if(MUMPS_INCDIR) + foreach(mumps_hdr ${MUMPS_hdrs_to_find}) + set(MUMPS_${mumps_hdr}_DIRS "MUMPS_${mumps_hdr}_INCLUDE_DIRS-NOTFOUND") + find_path(MUMPS_${mumps_hdr}_DIRS + NAMES ${mumps_hdr} + HINTS ${MUMPS_INCDIR}) + endforeach() else() - if (MUMPS_FIND_REQUIRED) - message(FATAL_ERROR "Looking for mumps -- MUMPS_DIR is not set, to find" - " MUMPS please set MUMPS_DIR, the path to MUMPS installation where" - " sub-directories include/ and lib/ are located") + if(MUMPS_DIR) + set(MUMPS_${mumps_hdr}_DIRS "MUMPS_${mumps_hdr}_INCLUDE_DIRS-NOTFOUND") + foreach(mumps_hdr ${MUMPS_hdrs_to_find}) + find_path(MUMPS_${mumps_hdr}_DIRS + NAMES ${mumps_hdr} + HINTS ${MUMPS_DIR} + PATH_SUFFIXES "include") + endforeach() else() - if(NOT MUMPS_FIND_QUIETLY) - message(STATUS "Looking for mumps -- MUMPS_DIR is not set, to find" - " MUMPS please set MUMPS_DIR, the path to MUMPS installation where" - " sub-directories include/ and lib/ are located") - endif() + foreach(mumps_hdr ${MUMPS_hdrs_to_find}) + set(MUMPS_${mumps_hdr}_DIRS "MUMPS_${mumps_hdr}_INCLUDE_DIRS-NOTFOUND") + find_path(MUMPS_${mumps_hdr}_DIRS + NAMES ${mumps_hdr} + HINTS ${_inc_env}) + endforeach() endif() endif() @@ -329,57 +350,48 @@ endif() # Looking for lib # --------------- -# Try to find the mumps lib in the given paths -# -------------------------------------------- +# create list of libs to find +set(MUMPS_libs_to_find "mumps_common;pord") +if (MUMPS_LOOK_FOR_SEQ) + list(APPEND MUMPS_libs_to_find "mpiseq") +endif() +if(MUMPS_PREC_S) + list(APPEND MUMPS_libs_to_find "smumps") +endif() +if(MUMPS_PREC_D) + list(APPEND MUMPS_libs_to_find "dmumps") +endif() +if(MUMPS_PREC_C) + list(APPEND MUMPS_libs_to_find "cmumps") +endif() +if(MUMPS_PREC_Z) + list(APPEND MUMPS_libs_to_find "zmumps") +endif() # call cmake macro to find the lib path -if(MUMPS_DIR) - set(MUMPS_smumps_LIBRARY "MUMPS_smumps_LIBRARY-NOTFOUND") - find_library(MUMPS_smumps_LIBRARY - NAMES smumps - HINTS "${MUMPS_DIR}" "$ENV{MUMPS_DIR}" - PATH_SUFFIXES lib) - set(MUMPS_dmumps_LIBRARY "MUMPS_dmumps_LIBRARY-NOTFOUND") - find_library(MUMPS_dmumps_LIBRARY - NAMES dmumps - HINTS "${MUMPS_DIR}" "$ENV{MUMPS_DIR}" - PATH_SUFFIXES lib) - set(MUMPS_cmumps_LIBRARY "MUMPS_cmumps_LIBRARY-NOTFOUND") - find_library(MUMPS_cmumps_LIBRARY - NAMES cmumps - HINTS "${MUMPS_DIR}" "$ENV{MUMPS_DIR}" - PATH_SUFFIXES lib) - set(MUMPS_zmumps_LIBRARY "MUMPS_zmumps_LIBRARY-NOTFOUND") - find_library(MUMPS_zmumps_LIBRARY - NAMES zmumps - HINTS "${MUMPS_DIR}" "$ENV{MUMPS_DIR}" - PATH_SUFFIXES lib) - set(MUMPS_mumps_common_LIBRARY "MUMPS_mumps_common_LIBRARY-NOTFOUND") - find_library(MUMPS_mumps_common_LIBRARY - NAMES mumps_common - HINTS "${MUMPS_DIR}" "$ENV{MUMPS_DIR}" - PATH_SUFFIXES lib) - set(MUMPS_mpiseq_LIBRARY "MUMPS_mpiseq_LIBRARY-NOTFOUND") - find_library(MUMPS_mpiseq_LIBRARY - NAMES mpiseq - HINTS "${MUMPS_DIR}" "$ENV{MUMPS_DIR}" - PATH_SUFFIXES libseq) - set(MUMPS_pord_LIBRARY "MUMPS_pord_LIBRARY-NOTFOUND") - find_library(MUMPS_pord_LIBRARY - NAMES pord - HINTS "${MUMPS_DIR}" "$ENV{MUMPS_DIR}" - PATH_SUFFIXES lib) +if(MUMPS_LIBDIR) + foreach(mumps_lib ${MUMPS_libs_to_find}) + set(MUMPS_${mumps_lib}_LIBRARY "MUMPS_${mumps_lib}_LIBRARY-NOTFOUND") + find_library(MUMPS_${mumps_lib}_LIBRARY + NAMES ${mumps_lib} + HINTS ${MUMPS_LIBDIR}) + endforeach() else() - if (MUMPS_FIND_REQUIRED) - message(FATAL_ERROR "Looking for mumps -- MUMPS_DIR is not set, to find" - " MUMPS please set MUMPS_DIR, the path to MUMPS installation where" - " sub-directories include/ and lib/ are located") + if(MUMPS_DIR) + foreach(mumps_lib ${MUMPS_libs_to_find}) + set(MUMPS_${mumps_lib}_LIBRARY "MUMPS_${mumps_lib}_LIBRARY-NOTFOUND") + find_library(MUMPS_${mumps_lib}_LIBRARY + NAMES ${mumps_lib} + HINTS ${MUMPS_DIR} + PATH_SUFFIXES lib lib32 lib64) + endforeach() else() - if(NOT MUMPS_FIND_QUIETLY) - message(STATUS "Looking for mumps -- MUMPS_DIR is not set, to find" - " MUMPS please set MUMPS_DIR, the path to MUMPS installation where" - " sub-directories include/ and lib/ are located") - endif() + foreach(mumps_lib ${MUMPS_libs_to_find}) + set(MUMPS_${mumps_lib}_LIBRARY "MUMPS_${mumps_lib}_LIBRARY-NOTFOUND") + find_library(MUMPS_${mumps_lib}_LIBRARY + NAMES ${mumps_lib} + HINTS ${_lib_env}) + endforeach() endif() endif() diff --git a/CMakeModules/morse/find/FindPTSCOTCH.cmake b/CMakeModules/morse/find/FindPTSCOTCH.cmake index d35835475581387ee2bda1e9cfaa691463042154..5e715f7510e0c7bf7af4338eb4849b7b0d9da407 100644 --- a/CMakeModules/morse/find/FindPTSCOTCH.cmake +++ b/CMakeModules/morse/find/FindPTSCOTCH.cmake @@ -62,7 +62,19 @@ if (NOT PTSCOTCH_FOUND) endif() endif() -# PTSCOTCH may depend on Threads, try to find it +# Set the version to find +set(PTSCOTCH_LOOK_FOR_ESMUMPS OFF) + +if( PTSCOTCH_FIND_COMPONENTS ) + foreach( component ${PTSCOTCH_FIND_COMPONENTS} ) + if (${component} STREQUAL "ESMUMPS") + # means we look for esmumps library + set(PTSCOTCH_LOOK_FOR_ESMUMPS ON) + endif() + endforeach() +endif() + +# PTSCOTCH depends on Threads, try to find it if (NOT THREADS_FOUND) if (PTSCOTCH_FIND_REQUIRED) find_package(Threads REQUIRED) @@ -71,7 +83,7 @@ if (NOT THREADS_FOUND) endif() endif() -# PTSCOTCH may depend on MPI, try to find it +# PTSCOTCH depends on MPI, try to find it if (NOT MPI_FOUND) if (PTSCOTCH_FIND_REQUIRED) find_package(MPI REQUIRED) @@ -180,7 +192,12 @@ list(REMOVE_DUPLICATES _lib_env) # Try to find the ptscotch lib in the given paths # ---------------------------------------------- -set(PTSCOTCH_libs_to_find "ptscotch;scotch;scotcherrexit") +set(PTSCOTCH_libs_to_find "ptscotch;ptscotcherr") +if (PTSCOTCH_LOOK_FOR_ESMUMPS) + list(INSERT PTSCOTCH_libs_to_find 0 "ptesmumps") + list(APPEND PTSCOTCH_libs_to_find "esmumps" ) +endif() +list(APPEND PTSCOTCH_libs_to_find "scotch;scotcherr") # call cmake macro to find the lib path if(PTSCOTCH_LIBDIR) diff --git a/CMakeModules/morse/find/FindSCALAPACK.cmake b/CMakeModules/morse/find/FindSCALAPACK.cmake index 0c1863132f0d26b560d40f7c092860ae00dc601b..4eed59d50a6ad557a460d3f52eed6335d9501a57 100644 --- a/CMakeModules/morse/find/FindSCALAPACK.cmake +++ b/CMakeModules/morse/find/FindSCALAPACK.cmake @@ -3,7 +3,7 @@ # @copyright (c) 2009-2014 The University of Tennessee and The University # of Tennessee Research Foundation. # All rights reserved. -# @copyright (c) 2012-2014 Inria. All rights reserved. +# @copyright (c) 2012-2016 Inria. All rights reserved. # @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. # ### @@ -52,27 +52,6 @@ # License text for the above reference.) -# Set some colors -if(NOT WIN32) - string(ASCII 27 Esc) - set(ColourReset "${Esc}[m") - set(ColourBold "${Esc}[1m") - set(Red "${Esc}[31m") - set(Green "${Esc}[32m") - set(Yellow "${Esc}[33m") - set(Blue "${Esc}[34m") - set(Magenta "${Esc}[35m") - set(Cyan "${Esc}[36m") - set(White "${Esc}[37m") - set(BoldRed "${Esc}[1;31m") - set(BoldGreen "${Esc}[1;32m") - set(BoldYellow "${Esc}[1;33m") - set(BoldBlue "${Esc}[1;34m") - set(BoldMagenta "${Esc}[1;35m") - set(BoldCyan "${Esc}[1;36m") - set(BoldWhite "${Esc}[1;37m") -endif() - ## Some macros to print status when search for headers and libs # This macro informs why the _lib_to_find file has not been found macro(Print_Find_Library_Blas_Status _libname _lib_to_find) @@ -309,17 +288,17 @@ set(SCALAPACK95_LIBRARIES) if (NOT BLAS_FOUND) if(SCALAPACK_FIND_QUIETLY OR NOT SCALAPACK_FIND_REQUIRED) - find_package(BLAS) + find_package(BLASEXT) else() - find_package(BLAS REQUIRED) + find_package(BLASEXT REQUIRED) endif() endif () if (NOT LAPACK_FOUND) if(SCALAPACK_FIND_QUIETLY OR NOT SCALAPACK_FIND_REQUIRED) - find_package(LAPACK) + find_package(LAPACKEXT) else() - find_package(LAPACK REQUIRED) + find_package(LAPACKEXT REQUIRED) endif() endif () diff --git a/CMakeModules/morse/find/FindSCOTCH.cmake b/CMakeModules/morse/find/FindSCOTCH.cmake index 951958cdd7bac320d501e9ce559529f506b15094..3bec5b6771c8201bb252721992912febe4b62542 100644 --- a/CMakeModules/morse/find/FindSCOTCH.cmake +++ b/CMakeModules/morse/find/FindSCOTCH.cmake @@ -54,6 +54,18 @@ if (NOT SCOTCH_FOUND) endif() endif() +# Set the version to find +set(SCOTCH_LOOK_FOR_ESMUMPS OFF) + +if( SCOTCH_FIND_COMPONENTS ) + foreach( component ${SCOTCH_FIND_COMPONENTS} ) + if (${component} STREQUAL "ESMUMPS") + # means we look for esmumps library + set(SCOTCH_LOOK_FOR_ESMUMPS ON) + endif() + endforeach() +endif() + # SCOTCH may depend on Threads, try to find it if (NOT THREADS_FOUND) if (SCOTCH_FIND_REQUIRED) @@ -164,6 +176,9 @@ list(REMOVE_DUPLICATES _lib_env) # ---------------------------------------------- set(SCOTCH_libs_to_find "scotch;scotcherrexit") +if (SCOTCH_LOOK_FOR_ESMUMPS) + list(INSERT SCOTCH_libs_to_find 0 "esmumps") +endif() # call cmake macro to find the lib path if(SCOTCH_LIBDIR) diff --git a/CMakeModules/morse/find/FindSTARPU.cmake b/CMakeModules/morse/find/FindSTARPU.cmake index a2b1ed209b7444fc4c10f31d248cd2d78972795e..cc89bcbf20e38bff1069f73a56da45ebdea9fff3 100644 --- a/CMakeModules/morse/find/FindSTARPU.cmake +++ b/CMakeModules/morse/find/FindSTARPU.cmake @@ -163,9 +163,9 @@ endif() # STARPU may depend on BLAS, try to find it if (NOT BLAS_FOUND AND STARPU_LOOK_FOR_BLAS) if (STARPU_FIND_REQUIRED AND STARPU_FIND_REQUIRED_BLAS) - find_package(BLAS REQUIRED) + find_package(BLASEXT REQUIRED) else() - find_package(BLAS) + find_package(BLASEXT) endif() endif() diff --git a/CMakeModules/morse/find/FindTMG.cmake b/CMakeModules/morse/find/FindTMG.cmake index 9ded8c81914f61dd62718112205b279ff9b6d539..11923b911ac6d413c75db5415fb493a5845b7d74 100644 --- a/CMakeModules/morse/find/FindTMG.cmake +++ b/CMakeModules/morse/find/FindTMG.cmake @@ -3,7 +3,7 @@ # @copyright (c) 2009-2014 The University of Tennessee and The University # of Tennessee Research Foundation. # All rights reserved. -# @copyright (c) 2012-2014 Inria. All rights reserved. +# @copyright (c) 2012-2016 Inria. All rights reserved. # @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved. # ### @@ -38,7 +38,7 @@ # Copyright 2012-2013 Emmanuel Agullo # Copyright 2012-2013 Mathieu Faverge # Copyright 2012 Cedric Castagnede -# Copyright 2013 Florent Pruvost +# Copyright 2013-2016 Florent Pruvost # # Distributed under the OSI-approved BSD License (the "License"); # see accompanying file MORSE-Copyright.txt for details. @@ -70,9 +70,9 @@ endif (NOT _LANGUAGES_ MATCHES Fortran) # TMG depends on LAPACK anyway, try to find it if (NOT LAPACK_FOUND) if(TMG_FIND_REQUIRED) - find_package(LAPACK REQUIRED) + find_package(LAPACKEXT REQUIRED) else() - find_package(LAPACK) + find_package(LAPACKEXT) endif() endif() diff --git a/Doc/Site_dox/FExamples.dox b/Doc/Site_dox/FExamples.dox index d7a76eb34aef175fa46dd1377598ec4226581482..0200185e4f52699a9110f3fb21d45407003d114a 100644 --- a/Doc/Site_dox/FExamples.dox +++ b/Doc/Site_dox/FExamples.dox @@ -57,7 +57,7 @@ * particles in a unit cube and store the particle in a file with the FMA format: \code{.cpp} - * ./Exemple/Release/generateDistributions -N 2000000 -unitcube -filename my2kkpartfile.fma + * ./Examples/Release/generateDistributions -N 2000000 -unitcube -fout my2kkpartfile.fma \endcode Which create a file called my2kkpartfile.fma. diff --git a/Examples/RotationMPIFMM.cpp b/Examples/RotationMPIFMM.cpp index 50084eb8611fec973cd27444bcf7d489389b26e6..978df5f73bcc22b128da2252d278a55f60685ced 100644 --- a/Examples/RotationMPIFMM.cpp +++ b/Examples/RotationMPIFMM.cpp @@ -164,6 +164,8 @@ int main(int argc, char* argv[]) tree.getBoxWidth(), tree.getHeight(), &finalParticles,&balancer); + std::cout << "Local nb particles after sort " + << finalParticles.getSize() << std::endl; for(FSize idx = 0 ; idx < finalParticles.getSize(); ++idx){ tree.insert(finalParticles[idx].position,finalParticles[idx].indexInFile,finalParticles[idx].physicalValue); } diff --git a/Examples/fuseDistributions.cpp b/Examples/fuseDistributions.cpp index d5a6535cc3aa4e249836e23ace2db64b856aed39..094b261dc8363ee59849f1758b51f128798ae191 100644 --- a/Examples/fuseDistributions.cpp +++ b/Examples/fuseDistributions.cpp @@ -130,6 +130,10 @@ std::vector<distribution> subparse_file(const std::vector<std::string>& args, st ++i; } + if(i < args.size() && args[i] == "--file") { + --i; + } + if(gx > 1 || gy > 1 || gz > 1) { // Compute offset of lowest left grid offset diff --git a/Licence.txt b/Licence.txt index 7be5457ef5d7c4c4357d70b515d5019756b5b8ef..dbaa609a5fcf69ca7343bdb582a3b71f374f3a33 100644 --- a/Licence.txt +++ b/Licence.txt @@ -4,6 +4,9 @@ the more restrictive has to be used. ScalFmm est régi par la licence CeCILL-C & LGPL, en cas de conflit la plus restrictive prime. +Folders under Addons might have separate Licence, in such case +one can find a dedicated Licence file where appropriate. + ////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////// diff --git a/Src/BalanceTree/FEqualize.hpp b/Src/BalanceTree/FEqualize.hpp index b0c46806c548a4df08200c0075bf623a01634612..95caba72ad4e7dc08805001ded2feb3a0e3f12a7 100644 --- a/Src/BalanceTree/FEqualize.hpp +++ b/Src/BalanceTree/FEqualize.hpp @@ -81,7 +81,10 @@ public: pack.elementTo = Min(allObjectives[idxProc].second , myCurrentInterval.second) - myCurrentInterval.first; // Next time give from the previous end currentElement = pack.elementTo; - packToSend.push_back(pack); + + if(pack.elementTo - pack.elementFrom != 0){ + packToSend.push_back(pack); + } // Progress idxProc += 1; } diff --git a/Src/BalanceTree/FPartitionsMapping.hpp b/Src/BalanceTree/FPartitionsMapping.hpp new file mode 100644 index 0000000000000000000000000000000000000000..50f2525978cd28204f038afa9d8fb062f0f32336 --- /dev/null +++ b/Src/BalanceTree/FPartitionsMapping.hpp @@ -0,0 +1,316 @@ +// =================================================================================== +// Copyright ScalFmm 2011 INRIA, Olivier Coulaud, Bérenger Bramas, Matthias Messner +// olivier.coulaud@inria.fr, berenger.bramas@inria.fr +// This software is a computer program whose purpose is to compute the FMM. +// +// This software is governed by the CeCILL-C and LGPL licenses and +// abiding by the rules of distribution of free software. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public and CeCILL-C Licenses for more details. +// "http://www.cecill.info". +// "http://www.gnu.org/licenses". +// =================================================================================== +#ifndef FPARTITIONSMAPPING_HPP +#define FPARTITIONSMAPPING_HPP + +#include "Utils/FGlobal.hpp" +#include "Utils/FMpi.hpp" +#include "Containers/FVector.hpp" +#include "FLeafBalance.hpp" +#include "Files/FMpiTreeBuilder.hpp" + +template <class FReal> +class FPartitionsMapping { +protected: + FMpi::FComm comm; + + //! The number of particles from the initial decomposition + FSize nbParticlesInitial; + //! The number of particles from the scalfmm decomposition + FSize nbParticlesWorking; + + std::unique_ptr<FSize[]> nbParticlesSentToOthers; + std::unique_ptr<FSize[]> offsetNbParticlesSentToOthers; + std::unique_ptr<FSize[]> nbParticlesRecvFromOthers; + std::unique_ptr<FSize[]> offsetNbParticlesRecvFromOthers; + + std::unique_ptr<FSize[]> mappingToOthers; + + +public: + FPartitionsMapping(const FMpi::FComm& inComm) + : comm(inComm), nbParticlesInitial(0), nbParticlesWorking(0) { + } + + void setComm(const FMpi::FComm& inComm){ + comm = inComm; + } + + template< int NbPhysicalValuesPerPart> + struct TestParticle{ + FPoint<FReal> position; + std::array<FReal, NbPhysicalValuesPerPart> physicalValues; + FSize localIndex; + int initialProcOwner; + + const FPoint<FReal>& getPosition() const { + return position; + } + }; + + template< int NbPhysicalValuesPerPart, class FillerClass> + FVector<TestParticle<NbPhysicalValuesPerPart>> distributeParticles(const FSize inNbParticles, + const FPoint<FReal>& centerOfBox, const FReal boxWidth, + const int TreeHeight, FillerClass filler){ + nbParticlesInitial = inNbParticles; + + //////////////////////////////////////////////////////// + + std::unique_ptr<TestParticle<NbPhysicalValuesPerPart>[]> initialParticles(new TestParticle<NbPhysicalValuesPerPart>[inNbParticles]); + + // Create the array to distribute + for(int idxPart = 0 ; idxPart < nbParticlesInitial ; ++idxPart){ + filler(idxPart, &initialParticles[idxPart].position, &initialParticles[idxPart].physicalValues); + initialParticles[idxPart].localIndex = idxPart; + initialParticles[idxPart].initialProcOwner = comm.processId(); + } + + FVector<TestParticle<NbPhysicalValuesPerPart>> finalParticles; + FLeafBalance balancer; + FMpiTreeBuilder< FReal,TestParticle<NbPhysicalValuesPerPart> >::DistributeArrayToContainer(comm,initialParticles.get(), + nbParticlesInitial, + centerOfBox, + boxWidth, + TreeHeight, + &finalParticles, &balancer); + + FQuickSort<TestParticle<NbPhysicalValuesPerPart>,FSize>::QsOmp(finalParticles.data(), finalParticles.getSize(), + [](const TestParticle<NbPhysicalValuesPerPart>& p1, + const TestParticle<NbPhysicalValuesPerPart>& p2){ + return p1.initialProcOwner < p2.initialProcOwner + || (p1.initialProcOwner == p2.initialProcOwner + && p1.localIndex < p2.localIndex); + }); + + //////////////////////////////////////////////////////// + + nbParticlesWorking = finalParticles.getSize(); + + nbParticlesRecvFromOthers.reset(new FSize[comm.processCount()]); + memset(nbParticlesRecvFromOthers.get(), 0 , sizeof(FSize)*comm.processCount()); + + for(FSize idxPart = 0 ; idxPart < finalParticles.getSize() ; ++idxPart){ + // Count the particles received from each proc + nbParticlesRecvFromOthers[finalParticles[idxPart].initialProcOwner] += 1; + + } + + offsetNbParticlesRecvFromOthers.reset(new FSize[comm.processCount()+1]); + offsetNbParticlesRecvFromOthers[0] = 0; + + for(int idxProc = 0 ; idxProc < comm.processCount() ; ++idxProc){ + offsetNbParticlesRecvFromOthers[idxProc+1] = offsetNbParticlesRecvFromOthers[idxProc] + + nbParticlesRecvFromOthers[idxProc]; + } + + //////////////////////////////////////////////////////// + + std::unique_ptr<FSize[]> nbParticlesRecvFromOthersAllAll(new FSize[comm.processCount()*comm.processCount()]); + // Exchange how many each proc receive from aother + FMpi::MpiAssert( MPI_Allgather( nbParticlesRecvFromOthers.get(), comm.processCount(), FMpi::GetType(FSize()), + nbParticlesRecvFromOthersAllAll.get(), comm.processCount(), + FMpi::GetType(FSize()), comm.getComm()), __LINE__ ); + + //////////////////////////////////////////////////////// + + nbParticlesSentToOthers.reset(new FSize[comm.processCount()]); + FSize checkerSent = 0; + offsetNbParticlesSentToOthers.reset(new FSize[comm.processCount()+1]); + offsetNbParticlesSentToOthers[0] = 0; + + for(int idxProc = 0 ; idxProc < comm.processCount() ; ++idxProc){ + nbParticlesSentToOthers[idxProc] = nbParticlesRecvFromOthersAllAll[comm.processCount()*idxProc + comm.processId()]; + checkerSent += nbParticlesSentToOthers[idxProc]; + offsetNbParticlesSentToOthers[idxProc+1] = offsetNbParticlesSentToOthers[idxProc] + + nbParticlesSentToOthers[idxProc]; + } + // I must have send what I was owning at the beginning + FAssertLF(checkerSent == nbParticlesInitial); + + //////////////////////////////////////////////////////// + + std::unique_ptr<FSize[]> localIdsRecvOrdered(new FSize[nbParticlesWorking]); + + // We list the local id in order of the different proc + for(FSize idxPart = 0 ; idxPart < finalParticles.getSize() ; ++idxPart){ + const int procOwner = finalParticles[idxPart].initialProcOwner; + localIdsRecvOrdered[idxPart] = finalParticles[idxPart].localIndex; + FAssertLF(offsetNbParticlesRecvFromOthers[procOwner] <= idxPart + && idxPart < offsetNbParticlesRecvFromOthers[procOwner+1]); + } + + //////////////////////////////////////////////////////// + + std::unique_ptr<FSize[]> localIdsSendOrdered(new FSize[nbParticlesInitial]); + std::unique_ptr<MPI_Request[]> requests(new MPI_Request[comm.processCount()*2]); + + int iterRequest = 0; + for(int idxProc = 0 ; idxProc < comm.processCount() ; ++idxProc){ + if(idxProc == comm.processId()){ + FAssertLF(nbParticlesRecvFromOthers[idxProc] == nbParticlesSentToOthers[idxProc]); + memcpy(&localIdsSendOrdered[offsetNbParticlesSentToOthers[idxProc]], + &localIdsRecvOrdered[offsetNbParticlesRecvFromOthers[idxProc]], + sizeof(FSize)*nbParticlesRecvFromOthers[idxProc]); + } + else{ + const FSize nbRecvFromProc = nbParticlesRecvFromOthers[idxProc]; + if(nbRecvFromProc){ + FMpi::MpiAssert( MPI_Isend(&localIdsRecvOrdered[offsetNbParticlesRecvFromOthers[idxProc]], + int(nbRecvFromProc), + FMpi::GetType(FSize()), idxProc, + 99, comm.getComm(), &requests[iterRequest++]), __LINE__ ); + } + const FSize nbSendToProc = nbParticlesSentToOthers[idxProc]; + if(nbSendToProc){ + FMpi::MpiAssert( MPI_Irecv(&localIdsSendOrdered[offsetNbParticlesSentToOthers[idxProc]], + int(nbSendToProc), + FMpi::GetType(FSize()), idxProc, + 99, comm.getComm(), &requests[iterRequest++]), __LINE__ ); + } + } + } + + FMpi::MpiAssert( MPI_Waitall( iterRequest, requests.get(), MPI_STATUSES_IGNORE), __LINE__ ); + + //////////////////////////////////////////////////////// + + mappingToOthers.reset(new FSize[nbParticlesInitial]); + for(FSize idxPart = 0; idxPart < nbParticlesInitial ; ++idxPart){ + mappingToOthers[localIdsSendOrdered[idxPart]] = idxPart; + } + + return std::move(finalParticles); + } + + //////////////////////////////////////////////////////// + + // physicalValues must be of size nbParticlesInitial + template< int NbPhysicalValuesPerPart> + std::unique_ptr<std::array<FReal, NbPhysicalValuesPerPart>[]> distributeData( + const std::array<FReal, NbPhysicalValuesPerPart> physicalValues[]){ + std::unique_ptr<std::array<FReal, NbPhysicalValuesPerPart>[]> physicalValuesRorder( + new std::array<FReal, NbPhysicalValuesPerPart>[nbParticlesInitial]); + + for(FSize idxPart = 0; idxPart < nbParticlesInitial ; ++idxPart){ + physicalValuesRorder[mappingToOthers[idxPart]] = physicalValues[idxPart]; + } + + // Allocate the array to store the physical values of my working interval + std::unique_ptr<std::array<FReal, NbPhysicalValuesPerPart>[]> recvPhysicalValues(new std::array<FReal, NbPhysicalValuesPerPart>[nbParticlesWorking]); + + std::unique_ptr<MPI_Request[]> requests(new MPI_Request[comm.processCount()*2]); + int iterRequest = 0; + for(int idxProc = 0 ; idxProc < comm.processCount() ; ++idxProc){ + if(idxProc == comm.processId()){ + FAssertLF(nbParticlesRecvFromOthers[idxProc] == nbParticlesSentToOthers[idxProc]); + memcpy(&recvPhysicalValues[offsetNbParticlesRecvFromOthers[idxProc]], + &physicalValuesRorder[offsetNbParticlesSentToOthers[idxProc]], + sizeof(std::array<FReal, NbPhysicalValuesPerPart>)*nbParticlesRecvFromOthers[idxProc]); + } + else{ + const FSize nbSendToProc = nbParticlesSentToOthers[idxProc]; + if(nbSendToProc){ + FMpi::MpiAssert( MPI_Isend( + const_cast<std::array<FReal, NbPhysicalValuesPerPart>*>(&physicalValuesRorder[offsetNbParticlesSentToOthers[idxProc]]), + int(nbSendToProc*sizeof(std::array<FReal, NbPhysicalValuesPerPart>)), + MPI_BYTE, idxProc, + 2222, comm.getComm(), &requests[iterRequest++]), __LINE__ );; + } + const FSize nbRecvFromProc = nbParticlesRecvFromOthers[idxProc]; + if(nbRecvFromProc){ + FMpi::MpiAssert( MPI_Irecv( + (void*)&recvPhysicalValues[offsetNbParticlesRecvFromOthers[idxProc]], + int(nbRecvFromProc*sizeof(std::array<FReal, NbPhysicalValuesPerPart>)), + MPI_INT, idxProc, + 2222, comm.getComm(), &requests[iterRequest++]), __LINE__ );; + } + } + } + + FMpi::MpiAssert( MPI_Waitall( iterRequest, requests.get(), MPI_STATUSES_IGNORE), __LINE__ ); + + return std::move(recvPhysicalValues); + } + + //////////////////////////////////////////////////////// + + // resValues must be of size nbParticlesWorking + template< int NbResValuesPerPart> + std::unique_ptr<std::array<FReal, NbResValuesPerPart>[]> getResultingData( + const std::array<FReal, NbResValuesPerPart> resValues[]){ + // First allocate the array to store the result + std::unique_ptr<std::array<FReal, NbResValuesPerPart>[]> recvPhysicalValues( + new std::array<FReal, NbResValuesPerPart>[nbParticlesInitial]); + + std::unique_ptr<MPI_Request[]> requests(new MPI_Request[comm.processCount()*2]); + int iterRequest = 0; + for(int idxProc = 0 ; idxProc < comm.processCount() ; ++idxProc){ + if(idxProc == comm.processId()){ + FAssertLF(nbParticlesRecvFromOthers[idxProc] == nbParticlesSentToOthers[idxProc]); + memcpy(&recvPhysicalValues[offsetNbParticlesSentToOthers[idxProc]], + &resValues[offsetNbParticlesRecvFromOthers[idxProc]], + sizeof(std::array<FReal, NbResValuesPerPart>)*nbParticlesRecvFromOthers[idxProc]); + } + else{ + // I originaly receive nbRecvFromProc, so I should + // send nbRecvFromProc back to the real owner + const FSize nbRecvFromProc = nbParticlesRecvFromOthers[idxProc]; + if(nbRecvFromProc){ + FMpi::MpiAssert( MPI_Isend( + const_cast<std::array<FReal, NbResValuesPerPart>*>(&resValues[offsetNbParticlesRecvFromOthers[idxProc]]), + int(nbRecvFromProc*sizeof(std::array<FReal, NbResValuesPerPart>)), + MPI_BYTE, idxProc, + 1111, comm.getComm(), &requests[iterRequest++]), __LINE__ );; + } + // I sent nbSendToProc to idxProc, + // so I should receive nbSendToProc in my interval + const FSize nbSendToProc = nbParticlesSentToOthers[idxProc]; + if(nbSendToProc){ + FMpi::MpiAssert( MPI_Irecv( + &recvPhysicalValues[offsetNbParticlesSentToOthers[idxProc]], + int(nbSendToProc*sizeof(std::array<FReal, NbResValuesPerPart>)), + MPI_BYTE, idxProc, + 1111, comm.getComm(), &requests[iterRequest++]), __LINE__ );; + } + } + } + + FMpi::MpiAssert( MPI_Waitall( iterRequest, requests.get(), MPI_STATUSES_IGNORE), __LINE__ ); + + + std::unique_ptr<std::array<FReal, NbResValuesPerPart>[]> recvPhysicalValuesOrder( + new std::array<FReal, NbResValuesPerPart>[nbParticlesInitial]); + + for(FSize idxPart = 0; idxPart < nbParticlesInitial ; ++idxPart){ + recvPhysicalValuesOrder[idxPart] = recvPhysicalValues[mappingToOthers[idxPart]]; + } + + return std::move(recvPhysicalValuesOrder); + } + + //////////////////////////////////////////////////////// + + FSize getNbParticlesWorking() const{ + return nbParticlesWorking; + } + + FSize getMappingResultToLocal(const FSize inIdx) const{ + return mappingToOthers[inIdx]; + } + +}; + +#endif diff --git a/Src/Containers/FOctree.hpp b/Src/Containers/FOctree.hpp index dba482845213e5db586c5cebc9ac1f59e2acd542..826a7594bc0b8df3d1c8d6c66002aaa1415fedc2 100644 --- a/Src/Containers/FOctree.hpp +++ b/Src/Containers/FOctree.hpp @@ -1686,6 +1686,10 @@ public: * @param function */ void forEachLeaf(std::function<void(LeafClass*)> function){ + if(isEmpty()){ + return; + } + Iterator octreeIterator(this); octreeIterator.gotoBottomLeft(); @@ -1699,6 +1703,10 @@ public: * @param function */ void forEachCell(std::function<void(CellClass*)> function){ + if(isEmpty()){ + return; + } + Iterator octreeIterator(this); octreeIterator.gotoBottomLeft(); @@ -1718,6 +1726,10 @@ public: * @param function */ void forEachCellWithLevel(std::function<void(CellClass*,const int)> function){ + if(isEmpty()){ + return; + } + Iterator octreeIterator(this); octreeIterator.gotoBottomLeft(); @@ -1737,6 +1749,10 @@ public: * @param function */ void forEachCellLeaf(std::function<void(CellClass*,LeafClass*)> function){ + if(isEmpty()){ + return; + } + Iterator octreeIterator(this); octreeIterator.gotoBottomLeft(); diff --git a/Src/Containers/FTreeCoordinate.hpp b/Src/Containers/FTreeCoordinate.hpp index 6b919e6ad7bfe8a6fbeeb4401058154c08d45001..9437a46472652315b1324faaf8ffd478ec7c3137 100644 --- a/Src/Containers/FTreeCoordinate.hpp +++ b/Src/Containers/FTreeCoordinate.hpp @@ -25,15 +25,15 @@ #include "../Components/FAbstractSerializable.hpp" /** -* @author Berenger Bramas (berenger.bramas@inria.fr) -* @class FTreeCoordinate -* Please read the license -* -* This class represents tree coordinate. It is used to save -* the position in "box unit" (not system/space unit!). -* It is directly related to morton index, as interleaves -* bits from this coordinate make the morton index -*/ + * @author Berenger Bramas (berenger.bramas@inria.fr) + * @class FTreeCoordinate + * Please read the license + * + * This class represents tree coordinate. It is used to save + * the position in "box unit" (not system/space unit!). + * It is directly related to morton index, as interleaves + * bits from this coordinate make the morton index + */ class FTreeCoordinate : public FAbstractSerializable, public FPoint<int, 3> { private: using point_t = FPoint<int, 3>; @@ -67,8 +67,13 @@ public: {} /** - * Copy constructor - */ + * \brief Allow build from an FPoint object + */ + using point_t::point_t; + + /** + * Copy constructor + */ FTreeCoordinate(const FTreeCoordinate&) = default; /** Copy and increment constructor @@ -79,10 +84,10 @@ public: {} /** - * Copy assignment - * @param other the source class to copy - * @return this a reference to the current object - */ + * Copy assignment + * @param other the source class to copy + * @return this a reference to the current object + */ FTreeCoordinate& operator=(const FTreeCoordinate& other) = default; [[gnu::deprecated]] @@ -92,11 +97,11 @@ public: /** - * To get the morton index of the current position - * @complexity inLevel - * @param inLevel the level of the component - * @return morton index - */ + * To get the morton index of the current position + * @complexity inLevel + * @param inLevel the level of the component + * @return morton index + */ MortonIndex getMortonIndex() const{ MortonIndex index = 0x0LL; MortonIndex mask = 0x1LL; @@ -152,9 +157,9 @@ public: /** Test equal operator - * @param other the coordinate to compare - * @return true if other & current object have same position - */ + * @param other the coordinate to compare + * @return true if other & current object have same position + */ bool equals(const int inX, const int inY, const int inZ) const { return point_t::data()[0] == inX && point_t::data()[1] == inY diff --git a/Src/Core/FFmmAlgorithmThreadProc.hpp b/Src/Core/FFmmAlgorithmThreadProc.hpp index 34812c09bc31aefc72fc101f1942433ccfb3ad96..86fe46bb186b200166c4abb47c09f99225e34179 100644 --- a/Src/Core/FFmmAlgorithmThreadProc.hpp +++ b/Src/Core/FFmmAlgorithmThreadProc.hpp @@ -72,17 +72,20 @@ private: OctreeClass* const tree; ///< The octree to work on KernelClass** kernels; ///< The kernels - const FMpi::FComm& comm; ///< MPI comm + const FMpi::FComm comm; ///< MPI comm + FMpi::FComm fcomCompute; /// Used to store pointers to cells/leafs to work with - typename OctreeClass::Iterator* iterArray; + typename OctreeClass::Iterator* iterArray; /// Used to store pointers to cells/leafs to send/rcv typename OctreeClass::Iterator* iterArrayComm; int numberOfLeafs; ///< To store the size at the previous level const int MaxThreads; ///< Max number of thread allowed by openmp - const int nbProcess; ///< Process count - const int idProcess; ///< Current process id + const int nbProcessOrig; ///< Process count + const int idProcessOrig; ///< Current process id + int nbProcess; ///< Process count + int idProcess; ///< Current process id const int OctreeHeight; ///< Tree height const int userChunkSize; @@ -149,12 +152,15 @@ public: tree(inTree), kernels(nullptr), comm(inComm), + fcomCompute(inComm), iterArray(nullptr), iterArrayComm(nullptr), numberOfLeafs(0), MaxThreads(FEnv::GetValue("SCALFMM_ALGO_NUM_THREADS",omp_get_max_threads())), - nbProcess(inComm.processCount()), - idProcess(inComm.processId()), + nbProcessOrig(inComm.processCount()), + idProcessOrig(inComm.processId()), + nbProcess(0), + idProcess(0), OctreeHeight(tree->getHeight()), userChunkSize(inUserChunkSize), leafLevelSeparationCriteria(inLeafLevelSeperationCriteria), @@ -164,9 +170,9 @@ public: FAssertLF(leafLevelSeparationCriteria < 3, "Separation criteria should be < 3"); this->kernels = new KernelClass*[MaxThreads]; - #pragma omp parallel num_threads(MaxThreads) +#pragma omp parallel num_threads(MaxThreads) { - #pragma omp critical (InitFFmmAlgorithmThreadProc) +#pragma omp critical (InitFFmmAlgorithmThreadProc) { this->kernels[omp_get_thread_num()] = new KernelClass(*inKernels); } @@ -175,7 +181,7 @@ public: FAbstractAlgorithm::setNbLevelsInTree(tree->getHeight()); FLOG(FLog::Controller << "FFmmAlgorithmThreadProc\n"); - FLOG(FLog::Controller << "Max threads = " << MaxThreads << ", Procs = " << nbProcess << ", I am " << idProcess << ".\n"); + FLOG(FLog::Controller << "Max threads = " << MaxThreads << ", Procs = " << nbProcessOrig << ", I am " << idProcessOrig << ".\n"); FLOG(FLog::Controller << "Chunck Size = " << userChunkSize << "\n"); } @@ -196,136 +202,158 @@ protected: * Call this function to run the complete algorithm */ void executeCore(const unsigned operationsToProceed) override { - // Count leaf + // We are not involve if the tree is empty + const int iHaveParticles = (!tree->isEmpty()); + + std::unique_ptr<int[]> hasParticles(new int[comm.processCount()]); + FMpi::Assert( MPI_Allgather(const_cast<int*>(&iHaveParticles), 1,MPI_INT, + hasParticles.get(), 1, MPI_INT, + comm.getComm()), __LINE__); + + fcomCompute = FMpi::FComm(comm); + fcomCompute.groupReduce(hasParticles.get()); + + if(iHaveParticles){ + + nbProcess = fcomCompute.processCount(); + idProcess = fcomCompute.processId(); + + FLOG(FLog::Controller << "Max threads = " << MaxThreads << ", Procs = " << nbProcess << ", I am " << idProcess << ".\n"); + + // Count leaf #ifdef SCALFMM_TRACE_ALGO - eztrace_resume(); + eztrace_resume(); #endif - this->numberOfLeafs = 0; - { - Interval myFullInterval; - {//Building the interval with the first and last leaves (and count the number of leaves) - typename OctreeClass::Iterator octreeIterator(tree); - octreeIterator.gotoBottomLeft(); - myFullInterval.leftIndex = octreeIterator.getCurrentGlobalIndex(); - do{ - ++this->numberOfLeafs; - } while(octreeIterator.moveRight()); - myFullInterval.rightIndex = octreeIterator.getCurrentGlobalIndex(); - } - // Allocate a number to store the pointer of the cells at a level - iterArray = new typename OctreeClass::Iterator[numberOfLeafs]; - iterArrayComm = new typename OctreeClass::Iterator[numberOfLeafs]; - FAssertLF(iterArray, "iterArray bad alloc"); - FAssertLF(iterArrayComm, "iterArrayComm bad alloc"); - - // We get the leftIndex/rightIndex indexes from each procs - FMpi::MpiAssert( MPI_Allgather( &myFullInterval, sizeof(Interval), MPI_BYTE, intervals, sizeof(Interval), MPI_BYTE, comm.getComm()), __LINE__ ); - - // Build my intervals for all levels - std::unique_ptr<Interval[]> myIntervals(new Interval[OctreeHeight]); - // At leaf level we know it is the full interval - myIntervals[OctreeHeight - 1] = myFullInterval; - - // We can estimate the interval for each level by using the parent/child relation - for(int idxLevel = OctreeHeight - 2 ; idxLevel >= 0 ; --idxLevel){ - myIntervals[idxLevel].leftIndex = myIntervals[idxLevel+1].leftIndex >> 3; - myIntervals[idxLevel].rightIndex = myIntervals[idxLevel+1].rightIndex >> 3; - } + this->numberOfLeafs = 0; + { + Interval myFullInterval; + {//Building the interval with the first and last leaves (and count the number of leaves) + typename OctreeClass::Iterator octreeIterator(tree); + octreeIterator.gotoBottomLeft(); + myFullInterval.leftIndex = octreeIterator.getCurrentGlobalIndex(); + do{ + ++this->numberOfLeafs; + } while(octreeIterator.moveRight()); + myFullInterval.rightIndex = octreeIterator.getCurrentGlobalIndex(); + } + // Allocate a number to store the pointer of the cells at a level + iterArray = new typename OctreeClass::Iterator[numberOfLeafs]; + iterArrayComm = new typename OctreeClass::Iterator[numberOfLeafs]; + FAssertLF(iterArray, "iterArray bad alloc"); + FAssertLF(iterArrayComm, "iterArrayComm bad alloc"); + + // We get the leftIndex/rightIndex indexes from each procs + FMpi::MpiAssert( MPI_Allgather( &myFullInterval, sizeof(Interval), MPI_BYTE, intervals, sizeof(Interval), MPI_BYTE, fcomCompute.getComm()), __LINE__ ); + + // Build my intervals for all levels + std::unique_ptr<Interval[]> myIntervals(new Interval[OctreeHeight]); + // At leaf level we know it is the full interval + myIntervals[OctreeHeight - 1] = myFullInterval; + + // We can estimate the interval for each level by using the parent/child relation + for(int idxLevel = OctreeHeight - 2 ; idxLevel >= 0 ; --idxLevel){ + myIntervals[idxLevel].leftIndex = myIntervals[idxLevel+1].leftIndex >> 3; + myIntervals[idxLevel].rightIndex = myIntervals[idxLevel+1].rightIndex >> 3; + } - // Process 0 uses the estimates as real intervals, but other processes - // should remove cells that belong to others - if(idProcess != 0){ - //We test for each level if process on left (idProcess-1) own cell I thought I owned - typename OctreeClass::Iterator octreeIterator(tree); - octreeIterator.gotoBottomLeft(); - octreeIterator.moveUp(); + // Process 0 uses the estimates as real intervals, but other processes + // should remove cells that belong to others + if(idProcess != 0){ + //We test for each level if process on left (idProcess-1) own cell I thought I owned + typename OctreeClass::Iterator octreeIterator(tree); + octreeIterator.gotoBottomLeft(); + octreeIterator.moveUp(); - // At h-1 the working limit is the parent of the right cell of the proc on the left - MortonIndex workingLimitAtLevel = intervals[idProcess-1].rightIndex >> 3; + // At h-1 the working limit is the parent of the right cell of the proc on the left + MortonIndex workingLimitAtLevel = intervals[idProcess-1].rightIndex >> 3; - // We check if we have no more work to do - int nullIntervalFromLevel = 0; + // We check if we have no more work to do + int nullIntervalFromLevel = 0; - for(int idxLevel = OctreeHeight - 2 ; idxLevel >= 1 && nullIntervalFromLevel == 0 ; --idxLevel){ - while(octreeIterator.getCurrentGlobalIndex() <= workingLimitAtLevel){ - if( !octreeIterator.moveRight() ){ - // We cannot move right we are not owner of any more cell - nullIntervalFromLevel = idxLevel; - break; + for(int idxLevel = OctreeHeight - 2 ; idxLevel >= 1 && nullIntervalFromLevel == 0 ; --idxLevel){ + while(octreeIterator.getCurrentGlobalIndex() <= workingLimitAtLevel){ + if( !octreeIterator.moveRight() ){ + // We cannot move right we are not owner of any more cell + nullIntervalFromLevel = idxLevel; + break; + } + } + // If we are responsible for some cells at this level keep the first index + if(nullIntervalFromLevel == 0){ + myIntervals[idxLevel].leftIndex = octreeIterator.getCurrentGlobalIndex(); + octreeIterator.moveUp(); + workingLimitAtLevel >>= 3; } } - // If we are responsible for some cells at this level keep the first index - if(nullIntervalFromLevel == 0){ - myIntervals[idxLevel].leftIndex = octreeIterator.getCurrentGlobalIndex(); - octreeIterator.moveUp(); - workingLimitAtLevel >>= 3; + // In case we are not responsible for any cells we put the leftIndex = rightIndex+1 + for(int idxLevel = nullIntervalFromLevel ; idxLevel >= 1 ; --idxLevel){ + myIntervals[idxLevel].leftIndex = myIntervals[idxLevel].rightIndex + 1; } } - // In case we are not responsible for any cells we put the leftIndex = rightIndex+1 - for(int idxLevel = nullIntervalFromLevel ; idxLevel >= 1 ; --idxLevel){ - myIntervals[idxLevel].leftIndex = myIntervals[idxLevel].rightIndex + 1; - } - } - // We get the leftIndex/rightIndex indexes from each procs - FMpi::MpiAssert( MPI_Allgather( myIntervals.get(), int(sizeof(Interval)) * OctreeHeight, MPI_BYTE, - workingIntervalsPerLevel, int(sizeof(Interval)) * OctreeHeight, MPI_BYTE, comm.getComm()), __LINE__ ); - } + // We get the leftIndex/rightIndex indexes from each procs + FMpi::MpiAssert( MPI_Allgather( myIntervals.get(), int(sizeof(Interval)) * OctreeHeight, MPI_BYTE, + workingIntervalsPerLevel, int(sizeof(Interval)) * OctreeHeight, MPI_BYTE, fcomCompute.getComm()), __LINE__ ); + } #ifdef SCALFMM_TRACE_ALGO - eztrace_enter_event("P2M", EZTRACE_YELLOW); + eztrace_enter_event("P2M", EZTRACE_YELLOW); #endif - Timers[P2MTimer].tic(); - if(operationsToProceed & FFmmP2M) bottomPass(); - Timers[P2MTimer].tac(); + Timers[P2MTimer].tic(); + if(operationsToProceed & FFmmP2M) bottomPass(); + Timers[P2MTimer].tac(); #ifdef SSCALFMM_TRACE_ALGO - eztrace_leave_event(); - eztrace_enter_event("M2M", EZTRACE_PINK); + eztrace_leave_event(); + eztrace_enter_event("M2M", EZTRACE_PINK); #endif - Timers[M2MTimer].tic(); - if(operationsToProceed & FFmmM2M) upwardPass(); - Timers[M2MTimer].tac(); + Timers[M2MTimer].tic(); + if(operationsToProceed & FFmmM2M) upwardPass(); + Timers[M2MTimer].tac(); #ifdef SCALFMM_TRACE_ALGO - eztrace_leave_event(); - eztrace_enter_event("M2L", EZTRACE_GREEN); + eztrace_leave_event(); + eztrace_enter_event("M2L", EZTRACE_GREEN); #endif - Timers[M2LTimer].tic(); - if(operationsToProceed & FFmmM2L) transferPass(); - Timers[M2LTimer].tac(); + Timers[M2LTimer].tic(); + if(operationsToProceed & FFmmM2L) transferPass(); + Timers[M2LTimer].tac(); - #ifdef SCALFMM_TRACE_ALGO - eztrace_leave_event(); - eztrace_enter_event("L2L", EZTRACE_PINK); +#ifdef SCALFMM_TRACE_ALGO + eztrace_leave_event(); + eztrace_enter_event("L2L", EZTRACE_PINK); #endif - Timers[L2LTimer].tic(); - if(operationsToProceed & FFmmL2L) downardPass(); - Timers[L2LTimer].tac(); + Timers[L2LTimer].tic(); + if(operationsToProceed & FFmmL2L) downardPass(); + Timers[L2LTimer].tac(); #ifdef SCALFMM_TRACE_ALGO - eztrace_leave_event(); - eztrace_enter_event("L2P+P2P", EZTRACE_BLUE); + eztrace_leave_event(); + eztrace_enter_event("L2P+P2P", EZTRACE_BLUE); #endif - Timers[NearTimer].tic(); - if( (operationsToProceed & FFmmP2P) || (operationsToProceed & FFmmL2P) ) directPass((operationsToProceed & FFmmP2P),(operationsToProceed & FFmmL2P)); - Timers[NearTimer].tac(); + Timers[NearTimer].tic(); + if( (operationsToProceed & FFmmP2P) || (operationsToProceed & FFmmL2P) ) directPass((operationsToProceed & FFmmP2P),(operationsToProceed & FFmmL2P)); + Timers[NearTimer].tac(); #ifdef SCALFMM_TRACE_ALGO - eztrace_leave_event(); - eztrace_stop(); + eztrace_leave_event(); + eztrace_stop(); #endif - // delete array - delete [] iterArray; - delete [] iterArrayComm; - iterArray = nullptr; - iterArrayComm = nullptr; + // delete array + delete [] iterArray; + delete [] iterArrayComm; + iterArray = nullptr; + iterArrayComm = nullptr; #ifdef SCALFMM_TRACE_ALGO - eztrace_stop(); + eztrace_stop(); #endif + } + else{ + FLOG( FLog::Controller << "\tProcess = " << comm.processId() << " has zero particles.\n" ); + } } ///////////////////////////////////////////////////////////////////////////// @@ -455,11 +483,11 @@ protected: } FLOG(parallelCounter.tic()); - #pragma omp parallel num_threads(MaxThreads) +#pragma omp parallel num_threads(MaxThreads) { KernelClass* myThreadkernels = (kernels[omp_get_thread_num()]); //This single section post and receive the comms, and then do the M2M associated with it. - #pragma omp single nowait +#pragma omp single nowait { FLOG(singleCounter.tic()); // Master proc never send @@ -491,10 +519,10 @@ protected: // Post the message bufferSize = sendBuffer.getSize(); MPI_Isend(&bufferSize, 1, FMpi::GetType(bufferSize), currentProcIdToSendTo, - FMpi::TagFmmM2MSize + idxLevel, comm.getComm(), &requestsSize[iterMpiRequestsSize++]); + FMpi::TagFmmM2MSize + idxLevel, fcomCompute.getComm(), &requestsSize[iterMpiRequestsSize++]); FAssertLF(sendBuffer.getSize() < std::numeric_limits<int>::max()); MPI_Isend(sendBuffer.data(), int(sendBuffer.getSize()), MPI_BYTE, currentProcIdToSendTo, - FMpi::TagFmmM2M + idxLevel, comm.getComm(), &requests[iterMpiRequests++]); + FMpi::TagFmmM2M + idxLevel, fcomCompute.getComm(), &requests[iterMpiRequests++]); } } @@ -509,7 +537,7 @@ protected: && ( !procHasWorkAtLevel(idxLevel+1, idProcSource) || procCoversMyRightBorderCell(idxLevel, idProcSource) )){ if(procHasWorkAtLevel(idxLevel+1, idProcSource) && procCoversMyRightBorderCell(idxLevel, idProcSource)){ MPI_Irecv(&recvBufferSize[nbProcThatSendToMe], 1, FMpi::GetType(recvBufferSize[nbProcThatSendToMe]), - idProcSource, FMpi::TagFmmM2MSize + idxLevel, comm.getComm(), &requestsSize[iterMpiRequestsSize++]); + idProcSource, FMpi::TagFmmM2MSize + idxLevel, fcomCompute.getComm(), &requestsSize[iterMpiRequestsSize++]); nbProcThatSendToMe += 1; FAssertLF(nbProcThatSendToMe <= 7); } @@ -535,7 +563,7 @@ protected: recvBuffer[nbProcThatSendToMe].cleanAndResize(recvBufferSize[nbProcThatSendToMe]); FAssertLF(recvBufferSize[nbProcThatSendToMe] < std::numeric_limits<int>::max()); MPI_Irecv(recvBuffer[nbProcThatSendToMe].data(), int(recvBufferSize[nbProcThatSendToMe]), MPI_BYTE, - idProcSource, FMpi::TagFmmM2M + idxLevel, comm.getComm(), &requests[iterMpiRequests++]); + idProcSource, FMpi::TagFmmM2M + idxLevel, fcomCompute.getComm(), &requests[iterMpiRequests++]); nbProcThatSendToMe += 1; FAssertLF(nbProcThatSendToMe <= 7); } @@ -557,11 +585,11 @@ protected: memcpy(currentChild, iterArray[totalNbCellsAtLevel - 1].getCurrentChild(), 8 * sizeof(CellClass*)); // Retreive data and merge my child and the child from others + int positionToInsert = 0; for(int idxProc = 0 ; idxProc < nbProcThatSendToMe ; ++idxProc){ unsigned packageFlags = unsigned(recvBuffer[idxProc].getValue<unsigned char>()); int position = 0; - int positionToInsert = 0; while( packageFlags && position < 8){ while(!(packageFlags & 0x1)){ packageFlags >>= 1; @@ -590,7 +618,7 @@ protected: }//End Of Single section // All threads proceed the M2M - #pragma omp for nowait schedule(dynamic, userChunkSize) +#pragma omp for nowait schedule(dynamic, userChunkSize) for( int idxCell = nbCellsToSkip ; idxCell < nbCellsForThreads ; ++idxCell){ myThreadkernels->M2M( iterArray[idxCell].getCurrentCell() , iterArray[idxCell].getCurrentChild(), idxLevel); } @@ -645,9 +673,9 @@ protected: FMpiBufferReader**const recvBuffer = new FMpiBufferReader*[nbProcess * OctreeHeight]; memset(recvBuffer, 0, sizeof(FMpiBufferReader*) * nbProcess * OctreeHeight); - #pragma omp parallel num_threads(MaxThreads) +#pragma omp parallel num_threads(MaxThreads) { - #pragma omp master +#pragma omp master { { FLOG(prepareCounter.tic()); @@ -747,7 +775,7 @@ protected: ////////////////////////////////////////////////////////////////// FLOG(gatherCounter.tic()); - FMpi::MpiAssert( MPI_Allgather( indexToSend, nbProcess * OctreeHeight, MPI_LONG_LONG_INT, globalReceiveMap, nbProcess * OctreeHeight, MPI_LONG_LONG_INT, comm.getComm()), __LINE__ ); + FMpi::MpiAssert( MPI_Allgather( indexToSend, nbProcess * OctreeHeight, MPI_LONG_LONG_INT, globalReceiveMap, nbProcess * OctreeHeight, MPI_LONG_LONG_INT, fcomCompute.getComm()), __LINE__ ); FLOG(gatherCounter.tac()); ////////////////////////////////////////////////////////////////// @@ -780,7 +808,7 @@ protected: FMpi::ISendSplit(sendBuffer[idxLevel * nbProcess + idxProc]->data(), sendBuffer[idxLevel * nbProcess + idxProc]->getSize(), idxProc, - FMpi::TagLast + idxLevel*100, comm, &requests); + FMpi::TagLast + idxLevel*100, fcomCompute, &requests); } const long long int toReceiveFromProcAtLevel = globalReceiveMap[(idxProc * nbProcess * OctreeHeight) + idxLevel * nbProcess + idProcess]; @@ -789,7 +817,7 @@ protected: FMpi::IRecvSplit(recvBuffer[idxLevel * nbProcess + idxProc]->data(), recvBuffer[idxLevel * nbProcess + idxProc]->getCapacity(), idxProc, - FMpi::TagLast + idxLevel*100, comm, &requests); + FMpi::TagLast + idxLevel*100, fcomCompute, &requests); } } } @@ -808,7 +836,7 @@ protected: // Do M2L ////////////////////////////////////////////////////////////////// - #pragma omp single nowait +#pragma omp single nowait { typename OctreeClass::Iterator octreeIterator(tree); octreeIterator.moveDown(); @@ -845,7 +873,7 @@ protected: { const int chunckSize = userChunkSize; for(int idxCell = 0 ; idxCell < numberOfCells ; idxCell += chunckSize){ - #pragma omp task default(none) shared(numberOfCells,idxLevel) firstprivate(idxCell) //+ shared(chunckSize) +#pragma omp task default(none) shared(numberOfCells,idxLevel) firstprivate(idxCell) //+ shared(chunckSize) { KernelClass * const myThreadkernels = kernels[omp_get_thread_num()]; const CellClass* neighbors[342]; @@ -860,15 +888,15 @@ protected: } }//End of task spawning - #pragma omp taskwait +#pragma omp taskwait for(int idxThread = 0 ; idxThread < omp_get_num_threads() ; ++idxThread){ - #pragma omp task default(none) firstprivate(idxThread,idxLevel) +#pragma omp task default(none) firstprivate(idxThread,idxLevel) { kernels[idxThread]->finishedLevelM2L(idxLevel); } } - #pragma omp taskwait +#pragma omp taskwait FLOG(computationCounter.tac()); } @@ -945,7 +973,7 @@ protected: // Compute this cells FLOG(computationCounter.tic()); - #pragma omp parallel num_threads(MaxThreads) +#pragma omp parallel num_threads(MaxThreads) { KernelClass * const myThreadkernels = kernels[omp_get_thread_num()]; MortonIndex neighborsIndex[/*189+26+1*/216]; @@ -953,7 +981,7 @@ protected: const CellClass* neighbors[342]; int neighborPositions[342]; - #pragma omp for schedule(dynamic, userChunkSize) nowait +#pragma omp for schedule(dynamic, userChunkSize) nowait for(int idxCell = 0 ; idxCell < numberOfCells ; ++idxCell){ // compute indexes const int counterNeighbors = iterArray[idxCell].getCurrentGlobalCoordinate().getInteractionNeighbors(idxLevel, neighborsIndex, neighborsPosition, separationCriteria); @@ -1079,10 +1107,10 @@ protected: } } - #pragma omp parallel num_threads(MaxThreads) +#pragma omp parallel num_threads(MaxThreads) { KernelClass* myThreadkernels = (kernels[omp_get_thread_num()]); - #pragma omp single nowait +#pragma omp single nowait { FLOG(prepareCounter.tic()); int iterRequests = 0; @@ -1092,7 +1120,7 @@ protected: // Post the receive if(hasToReceive){ FMpi::MpiAssert( MPI_Irecv( &recvBufferSize, 1, FMpi::GetType(recvBufferSize), idxProcToReceive, - FMpi::TagFmmL2LSize + idxLevel, comm.getComm(), &requestsSize[iterRequestsSize++]), __LINE__ ); + FMpi::TagFmmL2LSize + idxLevel, fcomCompute.getComm(), &requestsSize[iterRequestsSize++]), __LINE__ ); } // We have to be sure that we are not sending if we have no work in the current level @@ -1112,10 +1140,10 @@ protected: } // Post the send message FMpi::MpiAssert( MPI_Isend(&sendBufferSize, 1, FMpi::GetType(sendBufferSize), idxProcSend, - FMpi::TagFmmL2LSize + idxLevel, comm.getComm(), &requestsSize[iterRequestsSize++]), __LINE__); + FMpi::TagFmmL2LSize + idxLevel, fcomCompute.getComm(), &requestsSize[iterRequestsSize++]), __LINE__); FAssertLF(sendBuffer.getSize() < std::numeric_limits<int>::max()); FMpi::MpiAssert( MPI_Isend(sendBuffer.data(), int(sendBuffer.getSize()), MPI_BYTE, idxProcSend, - FMpi::TagFmmL2L + idxLevel, comm.getComm(), &requests[iterRequests++]), __LINE__); + FMpi::TagFmmL2L + idxLevel, fcomCompute.getComm(), &requests[iterRequests++]), __LINE__); // Inc and check the counter nbMessageSent += 1; FAssertLF(nbMessageSent <= 7); @@ -1129,7 +1157,7 @@ protected: // Finalize the communication if(iterRequestsSize){ FLOG(waitCounter.tic()); - FAssertLF(iterRequestsSize < 8); + FAssertLF(iterRequestsSize <= 8); FMpi::MpiAssert(MPI_Waitall( iterRequestsSize, requestsSize, MPI_STATUSES_IGNORE), __LINE__); FLOG(waitCounter.tac()); } @@ -1138,12 +1166,12 @@ protected: recvBuffer.cleanAndResize(recvBufferSize); FAssertLF(recvBuffer.getCapacity() < std::numeric_limits<int>::max()); FMpi::MpiAssert( MPI_Irecv( recvBuffer.data(), int(recvBuffer.getCapacity()), MPI_BYTE, idxProcToReceive, - FMpi::TagFmmL2L + idxLevel, comm.getComm(), &requests[iterRequests++]), __LINE__ ); + FMpi::TagFmmL2L + idxLevel, fcomCompute.getComm(), &requests[iterRequests++]), __LINE__ ); } if(iterRequests){ FLOG(waitCounter.tic()); - FAssertLF(iterRequests < 8); + FAssertLF(iterRequests <= 8); FMpi::MpiAssert(MPI_Waitall( iterRequests, requests, MPI_STATUSES_IGNORE), __LINE__); FLOG(waitCounter.tac()); } @@ -1159,12 +1187,12 @@ protected: FLOG(prepareCounter.tac()); } - #pragma omp single nowait +#pragma omp single nowait { FLOG(computationCounter.tic()); } // Threads are working on all the cell of our working interval at that level - #pragma omp for nowait schedule(dynamic, userChunkSize) +#pragma omp for nowait schedule(dynamic, userChunkSize) for(int idxCell = nbCellsToSkip ; idxCell < totalNbCellsAtLevel ; ++idxCell){ myThreadkernels->L2L( iterArray[idxCell].getCurrentCell() , iterArray[idxCell].getCurrentChild(), idxLevel); } @@ -1310,7 +1338,7 @@ protected: //Share to all processus globalReceiveMap FLOG(gatherCounter.tic()); FMpi::MpiAssert( MPI_Allgather( partsToSend, nbProcess, FMpi::GetType(*partsToSend), - globalReceiveMap, nbProcess, FMpi::GetType(*partsToSend), comm.getComm()), __LINE__ ); + globalReceiveMap, nbProcess, FMpi::GetType(*partsToSend), fcomCompute.getComm()), __LINE__ ); FLOG(gatherCounter.tac()); FMpiBufferReader**const recvBuffer = new FMpiBufferReader*[nbProcess]; @@ -1329,7 +1357,7 @@ protected: recvBuffer[idxProc] = new FMpiBufferReader(globalReceiveMap[idxProc * nbProcess + idProcess]); FMpi::IRecvSplit(recvBuffer[idxProc]->data(), recvBuffer[idxProc]->getCapacity(), - idxProc, FMpi::TagFmmP2P, comm, &requests); + idxProc, FMpi::TagFmmP2P, fcomCompute, &requests); } } @@ -1347,7 +1375,7 @@ protected: FAssertLF(sendBuffer[idxProc]->getSize() == globalReceiveMap[idProcess*nbProcess+idxProc]); FMpi::ISendSplit(sendBuffer[idxProc]->data(), sendBuffer[idxProc]->getSize(), - idxProc, FMpi::TagFmmP2P, comm, &requests); + idxProc, FMpi::TagFmmP2P, fcomCompute, &requests); } } @@ -1380,7 +1408,7 @@ protected: delete recvBuffer[idxProc]; recvBuffer[idxProc] = nullptr; } - } + } for(int idxProc = 0 ; idxProc < nbProcess ; ++idxProc){ delete sendBuffer[idxProc]; @@ -1472,7 +1500,7 @@ protected: // need the current particles and neighbors particles const int counter = tree->getLeafsNeighbors(neighbors, neighborPositions, currentIter.coord, OctreeHeight-1); myThreadkernels->P2P( currentIter.coord,currentIter.targets, - currentIter.sources, neighbors, neighborPositions, counter); + currentIter.sources, neighbors, neighborPositions, counter); } } } @@ -1528,7 +1556,7 @@ protected: } if(counter){ myThreadkernels.P2PRemote( currentIter.cell->getCoordinate(), currentIter.targets, - currentIter.sources, neighbors, neighborPositions, counter); + currentIter.sources, neighbors, neighborPositions, counter); } } diff --git a/Src/Files/FGenerateDistribution.hpp b/Src/Files/FGenerateDistribution.hpp index 33613987fd222d8385c1f1ef0ad50773e56d1b17..82612add58c593187a6941a270c767ccb0bc7449 100644 --- a/Src/Files/FGenerateDistribution.hpp +++ b/Src/Files/FGenerateDistribution.hpp @@ -269,7 +269,7 @@ void unifRandomPlummer(const FSize N, const FReal R, FReal * points) { while(m > rand_max) { m = getRandom<FReal>(); } - FReal r = FMath::Sqrt(1.0/(FMath::pow(m, -2.0/3.0) - 1.0)) / r_max; + FReal r = FMath::Sqrt(1.0/(FMath::pow(m, -2.0/3.0) - 1.0)) / r_max * R; points[j] *= r; points[j+1] *= r; points[j+2] *= r; diff --git a/Src/Files/FMpiTreeBuilder.hpp b/Src/Files/FMpiTreeBuilder.hpp index 95ed92bb00982bc3c6cd1ba978b78290c9aceb6e..fc449ea4038ca5203eb773ae545240b20fb42dcd 100644 --- a/Src/Files/FMpiTreeBuilder.hpp +++ b/Src/Files/FMpiTreeBuilder.hpp @@ -162,7 +162,7 @@ public: // To merge the leaves ////////////////////////////////////////////////////////////////////////// - static void MergeSplitedLeaves(const FMpi::FComm& communicator, IndexedParticle* workingArray, FSize* workingSize, + static void MergeSplitedLeaves(const FMpi::FComm& communicator, IndexedParticle** workingArray, FSize* workingSize, FSize ** leavesOffsetInParticles, ParticleClass** particlesArrayInLeafOrder, FSize* const leavesSize){ const int myRank = communicator.processId(); const int nbProcs = communicator.processCount(); @@ -171,13 +171,13 @@ public: { // Get the information of the leaves leavesInfo.clear(); if((*workingSize)){ - leavesInfo.push({workingArray[0].index, 1, 0}); + leavesInfo.push({(*workingArray)[0].index, 1, 0}); for(FSize idxPart = 1 ; idxPart < (*workingSize) ; ++idxPart){ - if(leavesInfo.data()[leavesInfo.getSize()-1].mindex == workingArray[idxPart].index){ + if(leavesInfo.data()[leavesInfo.getSize()-1].mindex == (*workingArray)[idxPart].index){ leavesInfo.data()[leavesInfo.getSize()-1].nbParts += 1; } else{ - leavesInfo.push({workingArray[idxPart].index, 1, idxPart}); + leavesInfo.push({(*workingArray)[idxPart].index, 1, idxPart}); } } } @@ -209,12 +209,16 @@ public: while(0 < idProcToSendTo && (allProcFirstLeafStates[(idProcToSendTo-1)*2 + 1].mindex == borderLeavesState[0].mindex || allProcFirstLeafStates[(idProcToSendTo-1)*2 + 1].mindex == noDataFlag)){ + FLOG(if(VerboseLog) FLog::Controller << "SCALFMM-DEBUG [" << communicator.processId() << "] idProcToSendTo " + << idProcToSendTo << " allProcFirstLeafStates[(idProcToSendTo-1)*2 + 1].mindex " << + allProcFirstLeafStates[(idProcToSendTo-1)*2 + 1].mindex << " borderLeavesState[0].mindex " << + borderLeavesState[0].mindex << "\n"; FLog::Controller.flush(); ); idProcToSendTo -= 1; } // We found someone if(idProcToSendTo != myRank && allProcFirstLeafStates[(idProcToSendTo)*2 + 1].mindex == borderLeavesState[0].mindex){ // Post and send message for the first leaf - FMpi::ISendSplit(&workingArray[0], borderLeavesState[0].nbParts, idProcToSendTo, + FMpi::ISendSplit(&(*workingArray)[0], borderLeavesState[0].nbParts, idProcToSendTo, FMpi::TagExchangeIndexs, communicator, &requests); FLOG(if(VerboseLog) FLog::Controller << "SCALFMM-DEBUG [" << communicator.processId() << "] send " << borderLeavesState[0].nbParts << " to " << idProcToSendTo << "\n"; FLog::Controller.flush(); ); hasSentFirstLeaf = true; @@ -228,11 +232,18 @@ public: // Count all the particle of our first leaf on other procs FSize totalNbParticlesToRecv = 0; int idProcToRecvFrom = myRank; - while(idProcToRecvFrom+1 < nbProcs && - (borderLeavesState[1].mindex == allProcFirstLeafStates[(idProcToRecvFrom+1)*2].mindex - || allProcFirstLeafStates[(idProcToRecvFrom+1)*2].mindex == noDataFlag)){ - idProcToRecvFrom += 1; - totalNbParticlesToRecv += allProcFirstLeafStates[(idProcToRecvFrom)*2].nbParts; + if(!hasSentFirstLeaf || borderLeavesState[0].mindex != borderLeavesState[1].mindex){ + while(idProcToRecvFrom+1 < nbProcs && + (borderLeavesState[1].mindex == allProcFirstLeafStates[(idProcToRecvFrom+1)*2].mindex + || allProcFirstLeafStates[(idProcToRecvFrom+1)*2].mindex == noDataFlag)){ + FLOG(if(VerboseLog) FLog::Controller << "SCALFMM-DEBUG [" << communicator.processId() << "] idProcToRecvFrom " + << idProcToRecvFrom << " allProcFirstLeafStates[(idProcToRecvFrom+1)*2].mindex " << + allProcFirstLeafStates[(idProcToRecvFrom+1)*2].mindex << " borderLeavesState[1].mindex " << + borderLeavesState[1].mindex << "\n"; FLog::Controller.flush(); ); + + idProcToRecvFrom += 1; + totalNbParticlesToRecv += allProcFirstLeafStates[(idProcToRecvFrom)*2].nbParts; + } } // If there are some if(totalNbParticlesToRecv){ @@ -262,7 +273,7 @@ public: const FSize offsetParticles = borderLeavesState[0].nbParts; // Move all the particles for(FSize idxPart = offsetParticles ; idxPart < (*workingSize) ; ++idxPart){ - workingArray[idxPart - offsetParticles] = workingArray[idxPart]; + (*workingArray)[idxPart - offsetParticles] = (*workingArray)[idxPart]; } // Move all the leaf for(int idxLeaf = 1 ; idxLeaf < leavesInfo.getSize() ; ++idxLeaf){ @@ -276,14 +287,16 @@ public: if(hasExtendLastLeaf){ // Allocate array const FSize finalParticlesNumber = (*workingSize) + receivedParticles.size(); + FLOG(if(VerboseLog) FLog::Controller << "SCALFMM-DEBUG [" << communicator.processId() << "] Create array " + << finalParticlesNumber << " particles\n"; FLog::Controller.flush(); ); IndexedParticle* particlesWithExtension = new IndexedParticle[finalParticlesNumber]; // Copy old data - memcpy(particlesWithExtension, workingArray, (*workingSize)*sizeof(IndexedParticle)); + memcpy(particlesWithExtension, (*workingArray), (*workingSize)*sizeof(IndexedParticle)); // Copy received data memcpy(particlesWithExtension + (*workingSize), receivedParticles.data(), receivedParticles.size()*sizeof(IndexedParticle)); // Move ptr - delete[] workingArray; - workingArray = particlesWithExtension; + delete[] (*workingArray); + (*workingArray) = particlesWithExtension; (*workingSize) = finalParticlesNumber; leavesInfo[leavesInfo.getSize()-1].nbParts += receivedParticles.size(); } @@ -298,7 +311,7 @@ public: //Copy all the particles (*particlesArrayInLeafOrder) = new ParticleClass[(*workingSize)]; for(FSize idxPart = 0 ; idxPart < (*workingSize) ; ++idxPart){ - memcpy(&(*particlesArrayInLeafOrder)[idxPart],&workingArray[idxPart].particle,sizeof(ParticleClass)); + memcpy(&(*particlesArrayInLeafOrder)[idxPart],&(*workingArray)[idxPart].particle,sizeof(ParticleClass)); } // Assign the number of leaf (*leavesSize) = leavesInfo.getSize(); @@ -363,8 +376,9 @@ public: const std::vector<FEqualize::Package> packsToSend = FEqualize::GetPackToSend(myCurrentInter, allObjectives); FAssertLF((currentNbLeaves == 0 && packsToSend.size() == 0) || - (packsToSend.size() && FSize(packsToSend[packsToSend.size()-1].elementTo) == currentNbLeaves)); + (currentNbLeaves != 0 && packsToSend.size() && FSize(packsToSend[packsToSend.size()-1].elementTo) == currentNbLeaves)); + FLOG(if(VerboseLog) FLog::Controller << "SCALFMM-DEBUG [" << communicator.processId() << "] Previous currentNbLeaves (" << currentNbLeaves << ")\n"; FLog::Controller.flush(); ); FLOG(if(VerboseLog) FLog::Controller << "SCALFMM-DEBUG [" << communicator.processId() << "] Get my interval (" << packsToSend.size() << ")\n"; FLog::Controller.flush(); ); FLOG(if(VerboseLog) FLog::Controller << "SCALFMM-DEBUG [" << communicator.processId() << "] Send data\n"; FLog::Controller.flush(); ); @@ -379,6 +393,8 @@ public: const FEqualize::Package& pack = packsToSend[idxPack]; if(idxPack != 0) FAssertLF(packsToSend[idxPack].elementFrom == packsToSend[idxPack-1].elementTo); + FAssertLF(FSize(pack.elementTo) <= FSize(currentNbParts)); + FAssertLF(pack.elementFrom <= pack.elementTo); const long long int nbPartsPerPackToSend = leavesOffsetInParticles[pack.elementTo]-leavesOffsetInParticles[pack.elementFrom]; totalSend += nbPartsPerPackToSend; @@ -388,7 +404,7 @@ public: << " from " << pack.elementFrom << " to " << pack.elementTo << " \n"; FLog::Controller.flush(); ); // Send the size of the data requestsNbParts.emplace_back(); - FMpi::MpiAssert(MPI_Isend(&nbPartsPerPackToSend,1,MPI_LONG_LONG_INT,pack.idProc, + FMpi::MpiAssert(MPI_Isend(const_cast<long long int*>(&nbPartsPerPackToSend),1,MPI_LONG_LONG_INT,pack.idProc, FMpi::TagExchangeIndexs, communicator.getComm(), &requestsNbParts.back()),__LINE__); } @@ -552,7 +568,9 @@ public: // From ParticleClass get array of IndexedParticle sorted GetSortedParticlesFromArray(communicator, originalParticlesArray, originalNbParticles, sortingType, boxCenter, boxWidth, treeHeight, &sortedParticlesArray, &nbParticlesInArray); - FLOG( FLog::Controller << "[" << communicator.processId() << "] Particles Distribution: " << "\t GetSortedParticlesFromArray is over (" << timer.tacAndElapsed() << "s)\n"; FLog::Controller.flush(); ); + FLOG( FLog::Controller << "[" << communicator.processId() << "] Particles Distribution: " + << "\t GetSortedParticlesFromArray is over (" << timer.tacAndElapsed() << "s) " + << nbParticlesInArray << " particles\n"; FLog::Controller.flush(); ); FLOG( timer.tic() ); // for(int idx = 0 ; idx < nbParticlesInArray ; ++idx){ @@ -563,7 +581,7 @@ public: FSize * leavesOffsetInParticles = nullptr; FSize nbLeaves = 0; // Merge the leaves - MergeSplitedLeaves(communicator, sortedParticlesArray, &nbParticlesInArray, &leavesOffsetInParticles, &particlesArrayInLeafOrder, &nbLeaves); + MergeSplitedLeaves(communicator, &sortedParticlesArray, &nbParticlesInArray, &leavesOffsetInParticles, &particlesArrayInLeafOrder, &nbLeaves); delete[] sortedParticlesArray; // for(int idx = 0 ; idx < nbParticlesInArray ; ++idx){ diff --git a/Src/Utils/FMpi.hpp b/Src/Utils/FMpi.hpp index f0654ea8ea223c919ca278774fecc095be29bf9e..c299a0fc6088dc6de6830c41ee35452b1cc5b85f 100644 --- a/Src/Utils/FMpi.hpp +++ b/Src/Utils/FMpi.hpp @@ -107,7 +107,7 @@ public: * This class is used to gather the usual methods related to identifying an * MPI communicator. */ - class FComm : public FNoCopyable { + class FComm { int rank; ///< rank related to the comm int nbProc; ///< nb proc in this group @@ -130,6 +130,26 @@ public: reset(); } + /// Constructor : duplicates the given communicator + FComm(const FComm& inCommunicator ) { + FMpi::Assert( MPI_Comm_dup(inCommunicator.communicator, &communicator), __LINE__ , "comm dup"); + FMpi::Assert( MPI_Comm_group(communicator, &group), __LINE__ , "comm group"); + + reset(); + } + + FComm& operator=(const FComm& inCommunicator ) { + FMpi::Assert( MPI_Comm_free(&communicator), __LINE__ ); + FMpi::Assert( MPI_Group_free(&group), __LINE__ ); + + FMpi::Assert( MPI_Comm_dup(inCommunicator.communicator, &communicator), __LINE__ , "comm dup"); + FMpi::Assert( MPI_Comm_group(communicator, &group), __LINE__ , "comm group"); + + reset(); + + return *this; + } + /// Frees communicator and group virtual ~FComm(){ FMpi::Assert( MPI_Comm_free(&communicator), __LINE__ ); @@ -248,6 +268,35 @@ public: delete[] procsIdArray ; } + /** Change the group, create one groupd where processInGroup[i] != 0 + * and another where processInGroup[i] == 0 + */ + void groupReduce(const int processInGroup[]){ + int * procsIdArray = new int [nbProc]; + int counterNewGroup = 0; + for(int idxProc = 0 ;idxProc < nbProc ; ++idxProc){ + if(processInGroup[rank] && processInGroup[idxProc]){ + procsIdArray[counterNewGroup++] = idxProc; + } + else if(!processInGroup[rank] && !processInGroup[idxProc]){ + procsIdArray[counterNewGroup++] = idxProc; + } + } + + MPI_Group previousGroup = group; + FMpi::Assert( MPI_Group_incl(previousGroup, counterNewGroup , procsIdArray, &group), __LINE__ ); + + MPI_Comm previousComm = communicator; + FMpi::Assert( MPI_Comm_create(previousComm, group, &communicator), __LINE__ ); + + MPI_Comm_free(&previousComm); + MPI_Group_free(&previousGroup); + + reset(); + FAssertLF(nbProc == counterNewGroup); + delete[] procsIdArray ; + } + void barrier() const { FMpi::Assert(MPI_Barrier(getComm()), __LINE__); } diff --git a/Src/Utils/FQuickSortMpi.hpp b/Src/Utils/FQuickSortMpi.hpp index 437d2ede4e2f81538ffb9ecdef515f88be01b197..eba5fdfff4bc1298bf76ef6e7cf1e7c72d354e36 100644 --- a/Src/Utils/FQuickSortMpi.hpp +++ b/Src/Utils/FQuickSortMpi.hpp @@ -93,14 +93,18 @@ class FQuickSortMpi : public FQuickSort< SortType, IndexType> { { // Get the number of elements each proc should recv IndexType totalRemainingElements = totalElements; + IndexType totalAvailableElements = totalElementToProceed; + for(int idxProc = firstProcToRecv; idxProc < lastProcToRecv ; ++idxProc){ const IndexType nbElementsAlreadyOwned = (inFromRightToLeft ? globalElementBalance[idxProc].lowerPart : globalElementBalance[idxProc].greaterPart); const IndexType averageNbElementForRemainingProc = (totalRemainingElements)/(lastProcToRecv-idxProc); totalRemainingElements -= nbElementsAlreadyOwned; FAssertLF(totalRemainingElements >= 0); - if(nbElementsAlreadyOwned < averageNbElementForRemainingProc){ - nbElementsToRecvPerProc[idxProc - firstProcToRecv] = (averageNbElementForRemainingProc - nbElementsAlreadyOwned); + if(nbElementsAlreadyOwned < averageNbElementForRemainingProc && totalAvailableElements){ + nbElementsToRecvPerProc[idxProc - firstProcToRecv] = FMath::Min(totalAvailableElements, + averageNbElementForRemainingProc - nbElementsAlreadyOwned); + totalAvailableElements -= nbElementsToRecvPerProc[idxProc - firstProcToRecv]; totalRemainingElements -= nbElementsToRecvPerProc[idxProc - firstProcToRecv]; } else{ @@ -308,6 +312,10 @@ class FQuickSortMpi : public FQuickSort< SortType, IndexType> { counterValuesInPivot += 1; } } + if(counterValuesInPivot <= 1){ + (*shouldStop) = true; + return globalPivot; + } (*shouldStop) = false; return globalPivot/counterValuesInPivot; } @@ -336,7 +344,7 @@ public: break; } - FLOG(if(VerboseLog) FLog::Controller << "SCALFMM-DEBUG [" << currentComm.processId() << "] globalPivot = " << globalPivot << "\n" ); + FLOG(if(VerboseLog) FLog::Controller << "SCALFMM-DEBUG [" << currentComm.processId() << "] globalPivot = " << globalPivot << " for " << currentComm.processCount() << "\n" ); FLOG(if(VerboseLog) FLog::Controller.flush()); // Split the array in two parts lower equal to pivot and greater than pivot diff --git a/Tests/Utils/testPartitionsMapping.cpp b/Tests/Utils/testPartitionsMapping.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b7f5c1bce67ae1b86b38253a3f94caef428d48a4 --- /dev/null +++ b/Tests/Utils/testPartitionsMapping.cpp @@ -0,0 +1,149 @@ +// =================================================================================== +// Copyright ScalFmm 2011 INRIA, Olivier Coulaud, Berenger Bramas +// olivier.coulaud@inria.fr, berenger.bramas@inria.fr +// This software is a computer program whose purpose is to compute the FMM. +// +// This software is governed by the CeCILL-C and LGPL licenses and +// abiding by the rules of distribution of free software. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public and CeCILL-C Licenses for more details. +// "http://www.cecill.info". +// "http://www.gnu.org/licenses". +// =================================================================================== + +// ==== CMAKE ===== +// @FUSE_MPI +// ================ + +#include <iostream> + +#include <cstdio> +#include <cstdlib> + + +#include "../../Src/Kernels/Rotation/FRotationCell.hpp" +#include "../../Src/Kernels/Rotation/FRotationKernel.hpp" + +#include "../../Src/Components/FSimpleLeaf.hpp" +#include "../../Src/Kernels/P2P/FP2PParticleContainerIndexed.hpp" + +#include "../../Src/Utils/FParameters.hpp" +#include "../../Src/Utils/FMemUtils.hpp" +#include "../../Src/BalanceTree/FPartitionsMapping.hpp" + +#include "../../Src/Containers/FOctree.hpp" +#include "../../Src/Containers/FVector.hpp" + +#include "../../Src/Files/FRandomLoader.hpp" +#include "../../Src/Files/FMpiTreeBuilder.hpp" + +#include "../../Src/Core/FFmmAlgorithm.hpp" +#include "../../Src/Core/FFmmAlgorithmThread.hpp" +#include "../../Src/Core/FFmmAlgorithmThreadProc.hpp" + +#include "../../Src/BalanceTree/FLeafBalance.hpp" + +#include "../../Src/Utils/FParameterNames.hpp" + +/** + * This program runs the FMM Algorithm Distributed with the Rotation kernel + */ + +// Simply create particles and try the kernels +int main(int argc, char* argv[]) +{ + FHelpDescribeAndExit(argc, argv, + "Test with MPI the chebyshev FMM and compare it to the direct computation for debugging purpose.", + FParameterDefinitions::NbParticles, FParameterDefinitions::OctreeHeight, + FParameterDefinitions::OctreeSubHeight, FParameterDefinitions::NbThreads); + + typedef double FReal; + + FMpi app(argc,argv); + + const FSize nbParticles = FParameters::getValue(argc,argv, FParameterDefinitions::NbParticles.options, 10000000ULL); + const unsigned int TreeHeight = FParameters::getValue(argc, argv, FParameterDefinitions::OctreeHeight.options, 5); + FTic time; + + std::cout << ">> This executable has to be used to test Proc Rotation Algorithm. \n"; + + // init particles position and physical value + struct TestParticle{ + FPoint<FReal> position; + const FPoint<FReal>& getPosition(){ + return position; + } + }; + + // open particle file + std::cout << "Creating : " << nbParticles << "\n" << std::endl; + FRandomLoader<FReal> loader(nbParticles, 1.0, FPoint<FReal>(0,0,0), app.global().processId()); + + time.tic(); + std::unique_ptr<TestParticle[]> particles(new TestParticle[loader.getNumberOfParticles()]); + for(FSize idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){ + loader.fillParticle(&particles[idxPart].position); + } + + FPartitionsMapping<FReal> map(app.global()); + + FVector<FPartitionsMapping<double>::TestParticle<0> > finalParticles = map.distributeParticles<0>(loader.getNumberOfParticles(), + loader.getCenterOfBox(), + loader.getBoxWidth(), TreeHeight, + [&](const int idx, FPoint<FReal>* position, std::array<FReal, 0>* /*val*/){ + position->setPosition(particles[idx].position.getX(), + particles[idx].position.getY(), + particles[idx].position.getZ()); + }); + + // Test every particles exist + { + std::unique_ptr<int[]> count(new int[app.global().processCount() * nbParticles]); + memset(count.get(), 0, sizeof(int) * app.global().processCount() * nbParticles); + for(FSize part = 0 ; part < finalParticles.getSize() ; ++part){ + const FSize idx = finalParticles[part].initialProcOwner*nbParticles + finalParticles[part].localIndex; + FAssertLF(count[idx] == 0) + count[idx] = 1; + } + + FMpi::Assert( MPI_Allreduce(MPI_IN_PLACE, count.get(), app.global().processCount()*nbParticles, MPI_INT, MPI_SUM, + app.global().getComm()), __LINE__); + + for(FSize part = 0 ; part < app.global().processCount()*nbParticles ; ++part){ + FAssertLF(count[part] == 1); + } + } + + // Test to send data + { + std::unique_ptr<std::array<FReal, 2>[]> toDistr(new std::array<FReal, 2>[nbParticles]); + for(FSize idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){ + toDistr[idxPart][0] = app.global().processId(); + toDistr[idxPart][1] = FReal(idxPart); + } + + std::unique_ptr<std::array<FReal, 2>[]> res = map.distributeData<2>(toDistr.get()); + + for(FSize part = 0 ; part < finalParticles.getSize() ; ++part){ + if(int(res[part][0]) != finalParticles[part].initialProcOwner) + std::cout << "[" << app.global().processId() << "]Res proc is " << res[part][0] << " should be " << finalParticles[part].initialProcOwner << std::endl; + if(int(res[part][1]) != finalParticles[part].localIndex) + std::cout << "[" << app.global().processId() << "]Res localidx is " << res[part][1] << " should be " << finalParticles[part].localIndex << std::endl; + } + + std::unique_ptr<std::array<FReal, 2>[]> resBack = map.getResultingData<2>(res.get()); + + for(FSize part = 0 ; part < loader.getNumberOfParticles() ; ++part){ + if(int(resBack[part][0]) != app.global().processId()) + std::cout << "[" << app.global().processId() << "]ResBack proc is " << resBack[part][0] << " should be " << app.global().processId() << std::endl; + if(int(resBack[part][1]) != part) + std::cout << "[" << app.global().processId() << "]ResBack localidx is " << resBack[part][1] << " should be " << part << std::endl; + } + } + + return 0; +} +