diff --git a/.guix b/.guix new file mode 120000 index 0000000000000000000000000000000000000000..546e858b945fcbce5abd6d70b1132e17a9dc8e15 --- /dev/null +++ b/.guix @@ -0,0 +1 @@ +guix-tools/ \ No newline at end of file diff --git a/CMakePresets.json b/CMakePresets.json index 4c642c2dd1ae088a27b652fdf2935861c9dc43c4..f9c010ce1671e583f68e98e12ca34d8ef85b1d74 100644 --- a/CMakePresets.json +++ b/CMakePresets.json @@ -15,7 +15,8 @@ "cacheVariables": { "CMAKE_BUILD_TYPE": "Release", "CMAKE_CXX_FLAGS": "-O3 -march=native", - "scalfmm_BUILD_UNITS": true + "scalfmm_BUILD_UNITS": true, + "scalfmm_USE_MPI": true } }, { @@ -73,11 +74,11 @@ }, { "name": "sequential", - "hidden": true, + "hidden": true, "inherits": "base", "filter": { "exclude": { - "name": "_omp$" + "name": "_omp$|_mpi$" } } }, @@ -91,6 +92,16 @@ } } }, + { + "name": "mpi", + "hidden": true, + "inherits": "base", + "filter": { + "include": { + "name": "_mpi$" + } + } + }, { "name": "test-default", "inherits": "base", @@ -132,6 +143,20 @@ "displayName": "Run OpenMP tests (MKL)", "description": "Run only the OpenMP tests with the MKL", "configurePreset": "mkl" + }, + { + "name": "test-default-mpi", + "inherits": "mpi", + "displayName": "Run MPI tests (OpenBLAS)", + "description": "Run only the MPI tests with OpenBLAS", + "configurePreset": "default" + }, + { + "name": "test-mkl-mpi", + "inherits": "mpi", + "displayName": "Run MPI tests (MKL)", + "description": "Run only the MPI tests with the MKL", + "configurePreset": "mkl" } ] } diff --git a/README.md b/README.md index 2b6283641677d570fc9f429c5534f1f37b4a4992..df3e5d7ca8ded55aaaa079eb25e2151328bd50a3 100644 --- a/README.md +++ b/README.md @@ -110,7 +110,7 @@ and to compile ```bash cd /path/to/build/ # Use cmake, with relevant options -cmake -DCMAKE_CXX_FLAGS= `-Xclang -fopenmp` -S ../ +cmake -DCMAKE_CXX_FLAGS=`-Xclang -fopenmp` -S ../ ``` #### Optimization @@ -275,6 +275,14 @@ guix time-machine -C .guix/scalfmm-channels.scm -- shell -C -m .guix/scalfmm-man We provide several manifest files: `scalfmm-manifest-openblas.scm`, `scalfmm-manifest-mkl.scm`. +### Build the documentation with guix + +TODO: provide instructions to build the documentations using `guix time-machine` and `guix shell` + +``` bash +guix time-machine -C .guix/scalfmm-channels.scm -- shell -C -m .guix/scalfmm-manifest-doc.scm -L .guix/ +``` + ## Contributing and development guidelines ### Gitlab flow diff --git a/checks/CMakeLists.txt b/checks/CMakeLists.txt index 80a382d34a103749319c1225ad46f2c8f1445bf9..85a49ab41189e8bb61db9fdff4c56fa26edc4263 100644 --- a/checks/CMakeLists.txt +++ b/checks/CMakeLists.txt @@ -52,10 +52,7 @@ endif() if(${CMAKE_PROJECT_NAME}_USE_MPI) list(APPEND source_check_files test_build_let.cpp - - # check_mpi.cpp count_particles_mpi.cpp - # test_compose.cpp ) # message(WARNING "source_check_files ") diff --git a/checks/check_1d.cpp b/checks/check_1d.cpp index d4dc4a8c58254c575e52b393a12adece890ec86f..ff1438e0bb9e439f488be7129548d888c27bd0fd 100644 --- a/checks/check_1d.cpp +++ b/checks/check_1d.cpp @@ -57,9 +57,9 @@ namespace local_args { struct matrix_kernel { - cpp_tools::cl_parser::str_vec flags = {"--kernel", "--k"}; + cpp_tools::cl_parser::str_vec flags = {"--kernel", "-k"}; std::string description = "Matrix kernels: \n 0) 1/r, 2) 1/r^2, " - "2) shift(ln r)-> grad 3) val_grad( 1/r)"; + "2) val_grad( 1/r)"; using type = int; type def = 0; }; @@ -244,6 +244,9 @@ auto run(const int& tree_height, const int& group_size, const std::size_t order, auto total_height = tree_height; interpolator_type interpolator(mk_far, order, total_height, box_width); + // auto memory = interpolator.memory_usage(); + // std::cout << "memory " << memory << std::endl; + typename FMM_OPERATOR_TYPE::far_field_type far_field(interpolator); // near_matrix_kernel_type mk_near{}; @@ -325,58 +328,28 @@ auto run_general(const int& tree_height, const int& group_size, const std::size_ return run<Dimension, value_type, fmm_operators_type>(tree_height, group_size, order, input_file, output_file, check, displayCells, displayParticles); } + else if(kernel == 2) + { // val_grad_one_over_r + using far_matrix_kernel_type = scalfmm::matrix_kernels::laplace::val_grad_one_over_r<1>; + using near_matrix_kernel_type = scalfmm::matrix_kernels::laplace::val_grad_one_over_r<1>; + using near_field_type = scalfmm::operators::near_field_operator<near_matrix_kernel_type>; + // + using interpolation_type = interpolator_alias<double, Dimension, far_matrix_kernel_type>; + + // using interpolation_type = + // scalfmm::interpolation::uniform_interpolator<value_type, Dimension, far_matrix_kernel_type>; + using far_field_type = scalfmm::operators::far_field_operator<interpolation_type, false>; + + using fmm_operators_type = scalfmm::operators::fmm_operators<near_field_type, far_field_type>; + + return run<Dimension, value_type, fmm_operators_type>(tree_height, group_size, order, input_file, output_file, + check, displayCells, displayParticles); + } else { - return 0; + std::cout << " kernel < 4" << std::endl; + std::exit(EXIT_FAILURE); } - // else if (kernel == 2) - // { // shift_ln_r - // using far_matrix_kernel_type = - // scalfmm::matrix_kernels::laplace::ln_2d; - // using near_matrix_kernel_type = - // scalfmm::matrix_kernels::laplace::grad_ln_2d; - // using near_field_type = scalfmm::operators::near_field_operator< - // near_matrix_kernel_type>; - // // - // using interpolation_type = - // scalfmm::interpolation::uniform_interpolator< - // value_type, Dimension, far_matrix_kernel_type>; - // using far_field_type = - // scalfmm::operators::far_field_operator<interpolation_type, - // true>; - - // using fmm_operators_type = - // scalfmm::operators::fmm_operators<near_field_type, - // far_field_type>; - - // return run<Dimension, value_type, fmm_operators_type>( - // tree_height, group_size, order, input_file, output_file, check, - // displayCells, displayParticles); - // } - // else if (kernel == 3) - // { // val_grad_one_over_r - // using far_matrix_kernel_type = - // scalfmm::matrix_kernels::laplace::val_grad_one_over_r<2>; - // using near_matrix_kernel_type = - // scalfmm::matrix_kernels::laplace::val_grad_one_over_r<2>; - // using near_field_type = scalfmm::operators::near_field_operator< - // near_matrix_kernel_type>; - // // - // using interpolation_type = - // scalfmm::interpolation::uniform_interpolator< - // value_type, Dimension, far_matrix_kernel_type>; - // using far_field_type = - // scalfmm::operators::far_field_operator<interpolation_type, - // false>; - - // using fmm_operators_type = - // scalfmm::operators::fmm_operators<near_field_type, - // far_field_type>; - - // return run<Dimension, value_type, fmm_operators_type>( - // tree_height, group_size, order, input_file, output_file, check, - // displayCells, displayParticles); - return 0; } // scalfmm::matrix_kernels::laplace::one_over_r; @@ -426,24 +399,4 @@ auto main([[maybe_unused]] int argc, [[maybe_unused]] char* argv[]) -> int auto ret = run_general<dimension, value_type>(tree_height, group_size, order, kernel, input_file, output_file, check, displayCells, displayParticles); return ret; -} - -/* -faux 3d - -0 p_tree [0.254212, 0.574017, 0] p_ref [0.254212, 0.574017, 0] -(8108.38 18.5173) 0 p_tree [0.926114, 0.470606, 0] p_ref [0.926114, 0.470606, -0](6937.27 7.06436 ) 0 p_tree [0.725386, 0.777877, 0] p_ref [0.725386, -0.777877,0] (4583.97 15.7301 ) 0 p_tree [0.411987, 0.622132, 0] p_ref -[0.411987,0.622132, 0] (9935.72 16.7617 ) - -2d - -0 p_tree [0.307883, 0.668131] p_ref [0.307883, 0.668131] (5414.29 13.8412 ) -0 p_tree [0.173692, 0.734691] p_ref [0.173692, 0.734691] (4656.34 20.3212 ) -0 p_tree [0.254212, 0.574017] p_ref [0.254212, 0.574017] (8108.38 18.5173 ) -0 p_tree [0.926114, 0.470606] p_ref [0.926114, 0.470606] (6937.27 7.06436 ) -0 p_tree [0.725386, 0.777877] p_ref [0.725386, 0.777877] (4583.97 15.7301 ) -0 p_tree [0.411987, 0.622132] p_ref [0.411987, 0.622132] (9935.72 16.7617 ) - -*/ +} \ No newline at end of file diff --git a/checks/check_2d.cpp b/checks/check_2d.cpp index 667fc174491f8a35f91dcfa5dbe46dcb50f9cce4..7ca992d3f2d476eb2ccb00b824f4c5ce1cd5e2ac 100644 --- a/checks/check_2d.cpp +++ b/checks/check_2d.cpp @@ -251,13 +251,17 @@ auto run(const int& tree_height, const int& group_size, const std::size_t order, typename FMM_OPERATOR_TYPE::far_field_type far_field(interpolator); // + auto memory = interpolator.memory_usage(); + std::cout << "memory " << memory << std::endl; + // near_matrix_kernel_type mk_near{}; - const bool mutual_near = true; + const bool mutual_near = false; typename FMM_OPERATOR_TYPE::near_field_type near_field(mk_near, mutual_near); // + std::cout << cpp_tools::colors::blue << "Fmm with kernels: " << std::endl - << " near " << mk_near.name() << std::endl + << " near " << mk_near.name() << " mutual " << std::boolalpha << near_field.mutual() << std::endl << " far " << mk_far.name() << std::endl << cpp_tools::colors::reset; @@ -267,8 +271,11 @@ auto run(const int& tree_height, const int& group_size, const std::size_t order, int const& separation_criterion = fmm_operator.near_field().separation_criterion(); bool const& mutual = fmm_operator.near_field().mutual(); scalfmm::list::sequential::build_interaction_lists(tree, tree, separation_criterion, mutual); + // scalfmm::io::trace(std::cout, tree, 4); auto operator_to_proceed = scalfmm::algorithms::all; + // auto operator_to_proceed = scalfmm::algorithms::operators_to_proceed::farfield; + // auto operator_to_proceed = scalfmm::algorithms::nearfield; // auto operator_to_proceed = scalfmm::algorithms::p2m | scalfmm::algorithms::m2m | scalfmm::algorithms::m2l; std::cout << cpp_tools::colors::blue << "operator_to_proceed: "; @@ -301,22 +308,22 @@ auto run(const int& tree_height, const int& group_size, const std::size_t order, } //////////////////////////////////////////////////////////////////////////// std::cout << "Save Tree\n"; - std::string outName("saveTree.bin"); - std::string header("Uniform FFT "); + std::string outName("tree_check2d_h" + std::to_string(tree_height) + ".bin"); + std::string header("chebyshev - Low-rank "); scalfmm::tools::io::save(outName, tree, header); - // - std::cout << "Read Tree\n"; - value_type eps{1.e-8}; - auto tree1 = scalfmm::tools::io::read<group_tree_type>(outName); - - if(scalfmm::utils::compare_two_trees(tree, tree1, eps, 3)) - { - std::cout << "Same trees !\n"; - } - else - { - std::cout << "Trees are different!\n"; - } + // // + // std::cout << "Read Tree\n"; + // value_type eps{1.e-8}; + // auto tree1 = scalfmm::tools::io::read<group_tree_type>(outName); + + // if(scalfmm::utils::compare_two_trees(tree, tree1, eps, 3)) + // { + // std::cout << "Same trees !\n"; + // } + // else + // { + // std::cout << "Trees are different!\n"; + // } return 1; } @@ -330,7 +337,7 @@ auto run_general(const int& tree_height, const int& group_size, const std::size_ // using far_matrix_kernel_type = // scalfmm::matrix_kernels::others::one_over_r2; using // near_matrix_kernel_type = scalfmm::matrix_kernels::others::one_over_r2; - // using options = scalfmm::options::uniform_<scalfmm::options::fft_>; + // using options = scalfmm::options::uniform_<scalfmm::options::fft_>; using options = scalfmm::options::chebyshev_<scalfmm::options::low_rank_>; // using options = scalfmm::options::chebyshev_<scalfmm::options::dense_>; if(kernel == 0) diff --git a/checks/count_particles_gen.hpp b/checks/count_particles_gen.hpp index 82be3ea72fb6d11858970f6d3ed7b3a6d2998a09..4abf785a57c2583bc224d3624fc886d128997d4d 100644 --- a/checks/count_particles_gen.hpp +++ b/checks/count_particles_gen.hpp @@ -218,18 +218,20 @@ auto run(const int& tree_height, const int& group_size, Array const& pbc, const // // fmm_operator_type fmm_operator{}; using fmm_operator_type = scalfmm::operators::fmm_operators<count_kernels::particles::count_near_field, count_kernels::particles::count_far_field<Dimension>>; - bool mutual = false; + bool mutual = true; count_kernels::particles::count_near_field nf(mutual); count_kernels::particles::count_far_field<Dimension> ff{}; fmm_operator_type fmm_operator(nf, ff); - // auto operator_to_proceed = scalfmm::algorithms::all; - auto operator_to_proceed = scalfmm::algorithms::farfield; + auto operator_to_proceed = scalfmm::algorithms::all; + // auto operator_to_proceed = scalfmm::algorithms::farfield; auto separation_criterion = fmm_operator.near_field().separation_criterion(); scalfmm::list::sequential::build_interaction_lists(tree, tree, separation_criterion, mutual); // scalfmm::io::trace(std::cout, tree, 4); - + std::cout << cpp_tools::colors::blue << "operator_to_proceed: "; + scalfmm::algorithms::print(operator_to_proceed); + std::cout << cpp_tools::colors::reset << std::endl; #ifdef COUNT_USE_OPENMP scalfmm::algorithms::fmm[scalfmm::options::_s(scalfmm::options::omp)](tree, fmm_operator, operator_to_proceed); #else @@ -273,9 +275,9 @@ auto run(const int& tree_height, const int& group_size, Array const& pbc, const std::cout << "wrong number of particles - nb particles (min) " << nb_particles_min << " (max) " << nb_particles_max << " (expected) " << number_of_particles << std::endl; -#ifdef SCALFMM_COUNT_KERNEL_SAVE_TREE +#ifndef SCALFMM_COUNT_KERNEL_SAVE_TREE std::cout << "Save the Tree \n"; - std::string outName("saveTreeSeq.bin"); + std::string outName("tree_count_gen.bin"); std::string header("count kernel seq "); scalfmm::tools::io::save(outName, tree, header); #endif diff --git a/checks/count_particles_mpi.cpp b/checks/count_particles_mpi.cpp index 0c719bd717fe562deefd55a690ea66918313e49f..4e3edfe4d0321981b71dc575de5935dc1d77953f 100644 --- a/checks/count_particles_mpi.cpp +++ b/checks/count_particles_mpi.cpp @@ -7,6 +7,7 @@ #include "scalfmm/operators/count_kernel/count_kernel.hpp" // #include "scalfmm/interpolation/grid_storage.hpp" +#include "scalfmm/meta/const_functions.hpp" #include "scalfmm/meta/utils.hpp" #include "scalfmm/tools/fma_dist_loader.hpp" #include "scalfmm/tools/tree_io.hpp" @@ -54,15 +55,11 @@ /// \endcode /// /// Examples -/// * we count the number of particles from the input file -/// \code -/// count_particles_{omp,seq} --input-file ../data/unitCube_100_PF.fma --tree-height 3 -gs 2 --dimension 3 -/// \endcode /// /// * Here we generate one particle per leaf located at the center, /// the number of particles = number of leaf = std::pow(2, dimension*(tree_height - 1)) /// \code -/// count_particles_{omp,seq} --tree-height 3 -gs 2 --dimension 2 +/// count_particles_mpi --tree-height 3 -gs 2 --dimension 2 --dist-part-leaf /// \endcode /** @@ -145,7 +142,7 @@ auto run(cpp_tools::parallel_manager::parallel_manager& para, const int& tree_he const bool interaction, bool use_leaf_distribution, bool use_particle_distribution) -> int { static constexpr std::size_t number_of_physical_values = 1; - static constexpr std::size_t dimpow2 = pow(2, Dimension); + static constexpr std::size_t dimpow2{scalfmm::meta::pow(2, Dimension)}; const auto runtime_order = 1; int level_shared{2}; @@ -278,13 +275,15 @@ auto run(cpp_tools::parallel_manager::parallel_manager& para, const int& tree_he // separation criteria used to construct M2L | P2P ghosts int separation = 1; // Construct the LET + std::cout << "\n build let \n" << std::flush; + auto letTree = scalfmm::tree::let::buildLetTree<group_tree_type>( para, number_of_particles, particles_set, box, leaf_level, level_shared, group_size, group_size, runtime_order, separation, use_leaf_distribution, use_particle_distribution); // if(para.io_master()) { - std::cout << cpp_tools::colors::blue << "Print tree distribution\n"; + std::cout << cpp_tools::colors::blue << "Print tree distribution\n" << std::flush; letTree.print_distrib(std::cout); std::cout << "\n trace 2\n" << std::flush; @@ -308,22 +307,28 @@ auto run(cpp_tools::parallel_manager::parallel_manager& para, const int& tree_he count_kernels::particles::count_near_field nf(mutual); count_kernels::particles::count_far_field<Dimension> ff{}; fmm_operator_type fmm_operator(nf, ff); - std::cout << cpp_tools::colors::red << "build_interaction_lists \n" << cpp_tools::colors::reset << std::flush; + std::cout << cpp_tools::colors::blue << "Fmm with kernels: " << nf.matrix_kernel().name() << " mutual " + << std::boolalpha << cpp_tools::colors::reset; scalfmm::list::sequential::build_interaction_lists(letTree, letTree, separation_criterion, mutual); - std::cout << cpp_tools::colors::red << "trace \n" << cpp_tools::colors::reset << std::flush; - // if(para.io_master()) - { - std::cout << cpp_tools::colors::red << "trace 4\n" << cpp_tools::colors::reset << std::flush; - - scalfmm::io::trace(std::cout, letTree, 4); - } - - // auto operator_to_proceed = scalfmm::algorithms::operators_to_proceed::farfield; - // auto operator_to_proceed = scalfmm::algorithms::operators_to_proceed::nearfield; + // std::cout << cpp_tools::colors::red << "trace \n" << cpp_tools::colors::reset << std::flush; + // // if(para.io_master()) + // { + // std::cout << cpp_tools::colors::red << "trace 4\n" << cpp_tools::colors::reset << std::flush; + + // scalfmm::io::trace(std::cout, letTree, 4); + // std::cout << "\n &&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&& \n" << std::flush; + + // std::cout << std::flush; + // } + //| scalfmm::algorithms::operators_to_proceed::m2l + // auto operator_to_proceed = + // scalfmm::algorithms::operators_to_proceed::p2p | scalfmm::algorithms::operators_to_proceed::p2m | + // scalfmm::algorithms::operators_to_proceed::m2m | scalfmm::algorithms::operators_to_proceed::m2l | + // scalfmm::algorithms::operators_to_proceed::l2l | scalfmm::algorithms::operators_to_proceed::l2p; + // auto operator_to_proceed = scalfmm::algorithms::operators_to_proceed::farfield; + // auto operator_to_proceed = scalfmm::algorithms::operators_to_proceed::nearfield; auto operator_to_proceed = scalfmm::algorithms::operators_to_proceed::all; - // auto operator_to_proceed = (scalfmm::algorithms::operators_to_proceed::p2m | scalfmm::algorithms::operators_to_proceed::m2l); - // auto operator_to_proceed = (scalfmm::algorithms::operators_to_proceed::p2m | scalfmm::algorithms::operators_to_proceed::m2m | scalfmm::algorithms::operators_to_proceed::m2l) ; scalfmm::algorithms::mpi::proc_task(letTree, fmm_operator, operator_to_proceed); // @@ -356,23 +361,52 @@ auto run(cpp_tools::parallel_manager::parallel_manager& para, const int& tree_he } }); std::cout << cpp_tools::colors::reset << '\n'; - if(right_number_of_particles) - { - std::cout << "Found the right number of particles - nb particles " << number_of_particles << std::endl; - } - else + + // if(!right_number_of_particles) + // { + // std::cout << "wrong number of particles - nb particles (min) " << nb_particles_min << " (max) " + // << nb_particles_max << " (expected) " << number_of_particles << std::endl; + + // // if(para.io_master()) + // // std::cout << "Save Tree in parallel\n"; + // // // std::string outName("saveTree_" + std::to_string(rank) + ".bin"); + // // std::string outName("tree_count_mpi.bin"); + // // std::string header("CHEBYSHEV LOW RANK "); + // // scalfmm::tools::io::save(para, outName, letTree, header); + // } + // else + // { + // std::cout << "Found the right number of particles - nb particles " << number_of_particles << std::endl; + // } + + // for(int level = letTree.height() - 1; level >= 2; --level) + // { + // std::cout << "\n -- level " << level << " -- " << std::endl; + // scalfmm::component::for_each_mine_leaf(letTree.begin_mine_cells(level), letTree.end_mine_cells(level), + // [](auto const& cell) + // { + // std::cout << "cell index " << cell.index() << " multipoles " + // << cell.multipoles().at(0) << " locals " + // << cell.locals().at(0) << std::endl; + // }); + // } + int res{int(right_number_of_particles)}; + + para.get_communicator().reduce(&res, 1, MPI_INT, MPI_LAND, 0); + + if(para.io_master()) { - std::cout << "wrong number of particles - nb particles (min) " << nb_particles_min << " (max) " - << nb_particles_max << " (expected) " << number_of_particles << std::endl; - - if(para.io_master()) - std::cout << "Save Tree in parallel\n"; - // std::string outName("saveTree_" + std::to_string(rank) + ".bin"); - std::string outName("saveTreeLet.bin"); - std::string header("CHEBYSHEV LOW RANK "); - scalfmm::tools::io::save(para, outName, letTree, header); + if(bool(res)) + { + std::cout << cpp_tools::colors::blue << "Right number of particles " << std::boolalpha << bool(res) + << cpp_tools::colors::reset << std::endl; + } + else + { + std::cout << cpp_tools::colors::red << "Wrong number of particles " << std::boolalpha << bool(res) + << cpp_tools::colors::reset << std::endl; + } } - return 0; } @@ -409,19 +443,21 @@ auto main([[maybe_unused]] int argc, [[maybe_unused]] char* argv[]) -> int use_particle_distribution = false; } + int dimension = parser.get<local_args::dimension>(); + // OpenMP + const std::size_t nb_threads{parser.get<args::thread_count>()}; + omp_set_dynamic(0); + omp_set_num_threads(nb_threads); if(para.io_master()) { - std::cout << cpp_tools::colors::blue << "<params> Tree height : " << tree_height << cpp_tools::colors::reset + std::cout << cpp_tools::colors::blue << "<params> Tree height: " << tree_height << cpp_tools::colors::reset << '\n'; - std::cout << cpp_tools::colors::blue << "<params> Group Size : " << group_size << cpp_tools::colors::reset + std::cout << cpp_tools::colors::blue << "<params> Group Size: " << group_size << cpp_tools::colors::reset + << '\n'; + std::cout << cpp_tools::colors::blue << "<params> Threads num: " << nb_threads << cpp_tools::colors::reset << '\n'; } - int dimension = parser.get<local_args::dimension>(); - // OpenMP - const std::size_t nb_threads{parser.get<args::thread_count>()}; - omp_set_dynamic(0); - omp_set_num_threads(nb_threads); // const bool readFile(parser.exists<local_args::read_file>()); std::string input_file; @@ -459,20 +495,19 @@ auto main([[maybe_unused]] int argc, [[maybe_unused]] char* argv[]) -> int << '\n'; } } - /* + // if(dimension == 1) { run<1>(para, tree_height, group_size, pbc, nb_level_above_root, readFile, input_file, interaction, use_leaf_distribution, use_particle_distribution); } else if(dimension == 2) - */ { run<2>(para, tree_height, group_size, pbc, nb_level_above_root, readFile, input_file, interaction, use_leaf_distribution, use_particle_distribution); } - /* - else if(dimension == 3) + + else if(dimension == 3) { run<3>(para, tree_height, group_size, pbc, nb_level_above_root, readFile, input_file, interaction, use_leaf_distribution, use_particle_distribution); @@ -482,9 +517,9 @@ auto main([[maybe_unused]] int argc, [[maybe_unused]] char* argv[]) -> int run<4>(para, tree_height, group_size, pbc, nb_level_above_root, readFile, input_file, interaction, use_leaf_distribution, use_particle_distribution); } - */ + // std::cout << std::flush; para.get_communicator().barrier(); para.end(); -} +} \ No newline at end of file diff --git a/checks/test_build_let.cpp b/checks/test_build_let.cpp index 6d05c7e9da9671546864d3078fc5c1fcf6e95e55..5a4e68e62a730915c68b7dd876a9b653e30e7a6d 100644 --- a/checks/test_build_let.cpp +++ b/checks/test_build_let.cpp @@ -57,18 +57,18 @@ namespace local_args cpp_tools::cl_parser::str_vec flags = {"--dist-part"}; std::string description = "Use the particle distribution to distribute the tree"; }; - struct PartLeafDistrib - { - /// Unused type, mandatory per interface specification - using type = bool; - /// The parameter is a flag, it doesn't expect a following value - enum - { - flagged - }; - cpp_tools::cl_parser::str_vec flags = {"--dist-part-leaf"}; - std::string description = "Use two distribution one for the particle and one for the tree"; - }; + // struct PartLeafDistrib + // { + // /// Unused type, mandatory per interface specification + // using type = bool; + // /// The parameter is a flag, it doesn't expect a following value + // enum + // { + // flagged + // }; + // cpp_tools::cl_parser::str_vec flags = {"--dist-part-leaf"}; + // std::string description = "Use two distribution one for the particle and one for the tree"; + // }; } // namespace local_args template<int dimension> auto run(cpp_tools::parallel_manager::parallel_manager& para, const std::string& input_file, @@ -172,13 +172,32 @@ auto run(cpp_tools::parallel_manager::parallel_manager& para, const std::string& // bool const mutual = false; // fmm_operator.near_field().mutual(); // scalfmm::list::sequential::build_interaction_lists(letGroupTree, letGroupTree, separation_criterion, mutual); - // scalfmm::utils::trace(std::cout, letGroupTree, 1); + scalfmm::io::trace(std::cout, letGroupTree, 2); - /// - /////////////////////////////////////////////////////////////////////////////////////////////////////// - /// - /////////////////////////////////////////////////////////////////////////////////////////////////////// - /// Save the data +/// +/////////////////////////////////////////////////////////////////////////////////////////////////////// +/// +/////////////////////////////////////////////////////////////////////////////////////////////////////// +/// Save the data +#ifdef SCALFMM_DEBUG_MPI + { + const int rank = para.get_process_id(); + std::string outName1("tree_rank_" + std::to_string(rank) + ".bin"); + std::string header1("LOCAL TREE "); + scalfmm::tools::io::save(outName1, letGroupTree, header1); + const int nbDataPerRecord = scalfmm::container::particle_traits<particle_type>::number_of_elements; + const int inputs_size = scalfmm::container::particle_traits<particle_type>::inputs_size; + const bool verbose_write = false; // True only for the master + std::string outName2("particles_rank_" + std::to_string(rank) + ".fma"); + + scalfmm::io::FFmaGenericWriter<value_type> writer_seq(outName2, verbose_write); + // Get the number of particles + auto number_of_particles = letGroupTree.number_particles(); + std::clog << "rank[" + std::to_string(rank) + "] number_of_particles " << number_of_particles << std::endl; + /// + writer_seq.writeDataFromTree(letGroupTree, number_of_particles); + } +#endif // const int nbDataPerRecord = scalfmm::container::particle_traits<particle_type>::number_of_elements; // const int inputs_size = scalfmm::container::particle_traits<particle_type>::inputs_size; @@ -215,7 +234,7 @@ auto main([[maybe_unused]] int argc, [[maybe_unused]] char* argv[]) -> int auto parser = cpp_tools::cl_parser::make_parser( cpp_tools::cl_parser::help{}, args::input_file(), args::output_file(), args::tree_height{}, args::order{}, args::thread_count{}, args::block_size{}, args::Dimension{}, local_args::PartDistrib{}, - local_args::PartLeafDistrib{}, local_args::LevelShared{}); + /*local_args::PartLeafDistrib{},*/ local_args::LevelShared{}); parser.parse(argc, argv); // Getting command line parameters const int tree_height{parser.get<args::tree_height>()}; @@ -229,11 +248,11 @@ auto main([[maybe_unused]] int argc, [[maybe_unused]] char* argv[]) -> int bool use_particle_distribution{parser.exists<local_args::PartDistrib>()}; bool use_leaf_distribution{!use_particle_distribution}; - if(parser.exists<local_args::PartLeafDistrib>()) - { - use_leaf_distribution = true; - use_particle_distribution = true; - } + // if(parser.exists<local_args::PartLeafDistrib>()) + // { + // use_leaf_distribution = true; + // use_particle_distribution = true; + // } if(para.io_master()) { diff --git a/checks/test_build_tree.cpp b/checks/test_build_tree.cpp index cf97d76a903548b0ba3fc7285ba11995ecc351c8..29cf5218d7afbb452dbf42df5a47fefb75f4832d 100644 --- a/checks/test_build_tree.cpp +++ b/checks/test_build_tree.cpp @@ -1,4 +1,6 @@ -#include <array> +// @FUSE_OMP + +#include <array> #include <chrono> #include <thread> @@ -6,12 +8,12 @@ #include "scalfmm/interpolation/interpolation.hpp" #include "scalfmm/lists/sequential.hpp" #include "scalfmm/matrix_kernels/laplace.hpp" -#include "scalfmm/tools/fma_dist_loader.hpp" +// #include "scalfmm/tools/fma_dist_loader.hpp" #include "scalfmm/tools/fma_loader.hpp" #include "scalfmm/tools/tree_io.hpp" #include "scalfmm/tree/box.hpp" #include "scalfmm/tree/cell.hpp" -// #include "scalfmm/tree/group_let.hpp" +// #include "scalfmm/tree/group_tree_view.hpp" #include "scalfmm/tree/io.hpp" #include "scalfmm/tree/leaf_view.hpp" @@ -20,7 +22,7 @@ #include <cpp_tools/cl_parser/tcli.hpp> #include <cpp_tools/colors/colorized.hpp> -#include <cpp_tools/parallel_manager/parallel_manager.hpp> +// #include <cpp_tools/parallel_manager/parallel_manager.hpp> /// /// \brief main /// \param argv @@ -55,9 +57,8 @@ namespace local_args } // namespace local_args template<int dimension> -auto run(cpp_tools::parallel_manager::parallel_manager& para, const std::string& input_file, - const std::string& output_file, const int tree_height, const int& part_group_size, const int& leaf_group_size, - const int order) -> int +auto run(const std::string& input_file, const std::string& output_file, const int tree_height, + const int& part_group_size, const int& leaf_group_size, const int order) -> int { constexpr int nb_inputs_near = 1; constexpr int nb_outputs_near = 1; @@ -87,8 +88,8 @@ auto run(cpp_tools::parallel_manager::parallel_manager& para, const std::string& /// /// 1) read constants of the problem in file; /// 2) each processor read N/P particles - - scalfmm::io::DistFmaGenericLoader<value_type, dimension> loader(input_file, para, para.io_master()); + bool verbose = true; + scalfmm::io::FFmaGenericLoader<value_type, dimension> loader(input_file, verbose); // const int local_number_of_particles = loader.getMyNumberOfParticles(); value_type width = loader.getBoxWidth(); @@ -194,10 +195,10 @@ auto run(cpp_tools::parallel_manager::parallel_manager& para, const std::string& auto main([[maybe_unused]] int argc, [[maybe_unused]] char* argv[]) -> int { - cpp_tools::parallel_manager::parallel_manager para; - para.init(); - std::cout << std::boolalpha << "para.io_master() " << para.io_master() << " get_process_id() " - << para.get_process_id() << std::endl; + // cpp_tools::parallel_manager::parallel_manager para; + // para.init(); + // std::cout << std::boolalpha << "para.io_master() " << para.io_master() << " get_process_id() " + // << para.get_process_id() << std::endl; // // Parameter handling auto parser = cpp_tools::cl_parser::make_parser(cpp_tools::cl_parser::help{}, args::input_file(), @@ -214,7 +215,7 @@ auto main([[maybe_unused]] int argc, [[maybe_unused]] char* argv[]) -> int const auto order{parser.get<args::order>()}; const auto dimension{parser.get<args::Dimension>()}; - if(para.io_master()) + // if(para.io_master()) { std::cout << cpp_tools::colors::blue << "<params> Tree height: " << tree_height << cpp_tools::colors::reset << '\n'; @@ -235,14 +236,14 @@ auto main([[maybe_unused]] int argc, [[maybe_unused]] char* argv[]) -> int { constexpr int dim = 2; - run<dim>(para, input_file, output_file, tree_height, group_size, group_size, order); + run<dim>(input_file, output_file, tree_height, group_size, group_size, order); break; } // case 3: // { // constexpr int dim = 3; - // run<dim>(para, input_file, output_file, tree_height, group_size, group_size, order); + // run<dim>( input_file, output_file, tree_height, group_size, group_size, order); // break; // } default: @@ -251,5 +252,5 @@ auto main([[maybe_unused]] int argc, [[maybe_unused]] char* argv[]) -> int } } - para.end(); + // para.end(); } diff --git a/cmake/dependencies/openmp.cmake b/cmake/dependencies/openmp.cmake index e73fa3d3b94ee55414feb7ca9dff1cfbc71f23de..5d4a7d7ae14698605bb5905bc460081146c3c78c 100644 --- a/cmake/dependencies/openmp.cmake +++ b/cmake/dependencies/openmp.cmake @@ -1,21 +1,36 @@ # # OpenMP # ------ +if(CMAKE_CXX_COMPILER_ID MATCHES "AppleClang") + # https://cliutils.gitlab.io/modern-cmake/chapters/packages/OpenMP.html + # build the target + find_file(OMP_H omp.h ENV HOMEBREW_CELLAR PATH_SUFFIXES "libomp/18.1.6/include") + cmake_print_variables(OMP_H) + add_library(OpenMP::OpenMP_CXX IMPORTED INTERFACE) + set_property(TARGET OpenMP::OpenMP_CXX + PROPERTY INTERFACE_COMPILE_OPTIONS "-Xclang -fopenm") + set_property(TARGET OpenMP::OpenMP_CXX + PROPERTY INTERFACE_INCLUDE_DIRECTORIES "/usr/local/Cellar/libomp/18.1.6/include") + set_property(TARGET OpenMP::OpenMP_CXX + PROPERTY INTERFACE_COMPILE_DEFINITIONS "_OPENMP") + set_property(TARGET OpenMP::OpenMP_CXX + PROPERTY INTERFACE_LINK_DIRECTORIES "/usr/local/Cellar/libomp/18.1.6/lib") -find_package(OpenMP REQUIRED) + # Only works if the same flag is passed to the linker; use CMake 3.9+ otherwise (Intel, AppleClang) + set_property(TARGET OpenMP::OpenMP_CXX + PROPERTY INTERFACE_LINK_LIBRARIES -lomp) + set(OpenMP_CXX_FOUND ON) +else() + find_package(OpenMP REQUIRED) +endif() if(OpenMP_CXX_FOUND) list(APPEND OMP_TARGET OpenMP::OpenMP_CXX cpp_tools::parallel_manager) list(APPEND OMP_COMPILE_DEFINITIONS CPP_TOOLS_PARALLEL_MANAGER_USE_OMP) list(APPEND FUSE_LIST OMP) - # cmake_print_variables(CMAKE_CXX_COMPILER_ID) - # cmake_print_variables(CMAKE_CXX_COMPILER_VERSION) - # cmake_print_variables(CMAKE_CXX_COMPILER_VERSION_INTERNAL) -# if(CMAKE_CXX_COMPILER_ID MATCHES "Clang") -# if(CMAKE_CXX_COMPILER_VERSION VERSION_GREATER "16.0") -# set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -lomp") -# endif() -# endif() + else(OpenMP_CXX_FOUND) - message(WARNING "OPENMP NOT FOUND") + message(WARNING " OPENMP NOT FOUND ") endif(OpenMP_CXX_FOUND) + +cmake_print_variables(FUSE_LIST) \ No newline at end of file diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index a4291fe51e8e9c4265ef5fef970054fb8c62e74c..9cdc274efdd326d031375d84479a58df7a0e431e 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -3,32 +3,27 @@ # List of source files set(source_tests_files + tutorial.cpp + # test test_particles.cpp test_dimension.cpp test_dimension_low_rank.cpp test_dimension_omp.cpp + fmm_source_target.cpp + # FMM test_laplace_kernels.cpp test_like_mrhs.cpp - # # debug & check - # count_particles_seq.cpp - # count_particles_st_seq.cpp - # count_particles_omp.cpp - # count_particles_st_omp.cpp - + # debug & check # test_time_loop.cpp - # test to move in compose/sandox project - fmm_source_target.cpp - tutorial.cpp - - # Test accuracy (barycentric interpolation) - test_accuracy.cpp ) +cmake_print_variables({CMAKE_PROJECT_NAME}_USE_MPI) if(${CMAKE_PROJECT_NAME}_USE_MPI) + list(APPEND source_tests_files test_mpi_algo.cpp ) diff --git a/examples/test_mpi_algo.cpp b/examples/test_mpi_algo.cpp index c37c4e1239c9f1284d3dd1f5208af16be0e35291..9501fb2cfb564246f50a2fd58b18d19a4875c82c 100644 --- a/examples/test_mpi_algo.cpp +++ b/examples/test_mpi_algo.cpp @@ -4,10 +4,9 @@ #include "scalfmm/container/particle.hpp" #include "scalfmm/interpolation/interpolation.hpp" -#include "scalfmm/lists/sequential.hpp" +#include "scalfmm/lists/lists.hpp" #include "scalfmm/matrix_kernels/laplace.hpp" #include "scalfmm/tools/fma_dist_loader.hpp" -#include "scalfmm/tools/fma_loader.hpp" #include "scalfmm/tools/tree_io.hpp" #include "scalfmm/tree/box.hpp" #include "scalfmm/tree/cell.hpp" @@ -31,7 +30,7 @@ /// \code /// mpirun -output-filename log --oversubscribe -np 3 ./examples/Release/test_mpi_algo /// --input-file ../data/debug/circle2d_r3.fma --order 3 --tree-height 3 -/// --group-size 3 -d 2 +/// --group-size 3 -d 2 /// \endcode namespace local_args { @@ -45,7 +44,7 @@ namespace local_args }; struct PartDistrib { - /// Unused type, m|atory per interface specification + /// Unused type, matory per interface specification using type = bool; /// The parameter is a flag, it doesn't expect a following value enum @@ -55,18 +54,18 @@ namespace local_args cpp_tools::cl_parser::str_vec flags = {"--dist-part"}; std::string description = "Use the particle distribution to distribute the tree"; }; - struct PartLeafDistrib - { - /// Unused type, m|atory per interface specification - using type = bool; - /// The parameter is a flag, it doesn't expect a following value - enum - { - flagged - }; - cpp_tools::cl_parser::str_vec flags = {"--dist-part-leaf"}; - std::string description = "Use two distribution one for the particle | one for the tree"; - }; + // struct PartLeafDistrib + // { + // /// Unused type, m|atory per interface specification + // using type = bool; + // /// The parameter is a flag, it doesn't expect a following value + // enum + // { + // flagged + // }; + // cpp_tools::cl_parser::str_vec flags = {"--dist-part-leaf"}; + // std::string description = "Use two distribution one for the particle | one for the tree"; + // }; } // namespace local_args using value_type = double; @@ -100,7 +99,6 @@ auto run(cpp_tools::parallel_manager::parallel_manager& para, const std::string& using leaf_type = scalfmm::component::leaf_view<particle_type>; using box_type = scalfmm::component::box<position_type>; using group_tree_type = scalfmm::component::dist_group_tree<cell_type, leaf_type, box_type>; - /// /////////////////////////////////////////////////////////////////////////////////////////////////////// /// Read the data in parallel @@ -139,6 +137,18 @@ auto run(cpp_tools::parallel_manager::parallel_manager& para, const std::string& particles_set[idx] = p; // std::cout << p << std::endl; } + /////////////////// + far_matrix_kernel_type mk_far{}; + const bool mutual_near = false; + + near_matrix_kernel_type mk_near{}; + interpolator_type interpolator(mk_far, order, static_cast<std::size_t>(tree_height), box.width(0)); + typename FMM_OPERATOR_TYPE::near_field_type near_field(mk_near, mutual_near); + typename FMM_OPERATOR_TYPE::far_field_type far_field(interpolator); + FMM_OPERATOR_TYPE fmm_operator(near_field, far_field); + // Build interaction lists + int const& separation_criterion = fmm_operator.near_field().separation_criterion(); + bool const& mutual = fmm_operator.near_field().mutual(); /// /////////////////////////////////////////////////////////////////////////////////////////////////////// // check @@ -152,11 +162,10 @@ auto run(cpp_tools::parallel_manager::parallel_manager& para, const std::string& /// 2) construct the tree, then the let /// // separation criteria used to construct M2L | P2P ghosts - int separation = 1; // Construct the LET auto letGroupTree = scalfmm::tree::let::buildLetTree<group_tree_type>( para, number_of_particles, particles_set, box, leaf_level, level_shared, part_group_size, leaf_group_size, order, - separation, use_leaf_distribution, use_particle_distribution); + separation_criterion, use_leaf_distribution, use_particle_distribution); if(para.io_master()) { @@ -165,58 +174,95 @@ auto run(cpp_tools::parallel_manager::parallel_manager& para, const std::string& std::cout << cpp_tools::colors::reset; } - // - // Build interaction lists - int const& separation_criterion = separation; // fmm_operator.near_field().separation_criterion(); - bool const mutual = false; // fmm_operator.near_field().mutual(); scalfmm::list::sequential::build_interaction_lists(letGroupTree, letGroupTree, separation_criterion, mutual); - scalfmm::io::trace(std::cout, letGroupTree, 2); +#ifdef SCALFMM_DEBUG_MPI + { + const int rank = para.get_process_id(); + std::string outName0("tree_group_rank_" + std::to_string(rank) + ".txt"); + std::ofstream out(outName0); + scalfmm::io::trace(out, letGroupTree, 2); + std::string outName1("tree_rank_" + std::to_string(rank) + ".bin"); + std::string header1("LOCAL TREE "); + scalfmm::tools::io::save(outName1, letGroupTree, header1); + // + const int nbDataPerRecord = scalfmm::container::particle_traits<particle_type>::number_of_elements; + const int inputs_size = scalfmm::container::particle_traits<particle_type>::inputs_size; + const bool verbose_write = true; // True only for the master + std::string outName2("particles_rank_" + std::to_string(rank) + ".fma"); - far_matrix_kernel_type mk_far{}; - interpolator_type interpolator(mk_far, order, static_cast<std::size_t>(tree_height), box.width(0)); - near_matrix_kernel_type mk_near{}; - typename FMM_OPERATOR_TYPE::near_field_type near_field(mk_near); - typename FMM_OPERATOR_TYPE::far_field_type far_field(interpolator); - FMM_OPERATOR_TYPE fmm_operator(near_field, far_field); + scalfmm::io::FFmaGenericWriter<value_type> writer_seq(outName2, verbose_write); + // Get the number of particles + auto number_of_particles = letGroupTree.number_particles(); + std::clog << "number_of_particles " << number_of_particles << std::endl; + /// + writer_seq.writeDataFromTree(letGroupTree, number_of_particles); + } + +#endif + if(para.io_master()) + { + std::cout << cpp_tools::colors::blue << "Fmm with kernels: " << std::endl + << " near " << mk_near.name() << std::endl + << " far " << mk_far.name() << std::endl + << cpp_tools::colors::reset; + } // - auto operator_to_proceed = scalfmm::algorithms::operators_to_proceed::farfield; - // auto operator_to_proceed = - // scalfmm::algorithms::operators_to_proceed::p2m | scalfmm::algorithms::operators_to_proceed::m2m | - // scalfmm::algorithms::operators_to_proceed::m2l | scalfmm::algorithms::operators_to_proceed::l2l | + auto operator_to_proceed = scalfmm::algorithms::operators_to_proceed::all; + // auto operator_to_proceed = scalfmm::algorithms::operators_to_proceed::farfield; + // auto operator_to_proceed = scalfmm::algorithms::operators_to_proceed::nearfield; + // auto operator_to_proceed = scalfmm::algorithms::operators_to_proceed::m2l; + // auto operator_to_proceed = scalfmm::algorithms::operators_to_proceed::p2m | + // scalfmm::algorithms::operators_to_proceed::m2m | + // scalfmm::algorithms::operators_to_proceed::m2l; + // | scalfmm::algorithms::operators_to_proceed::p2p | // scalfmm::algorithms::operators_to_proceed::l2p; + // // scalfmm::algorithms::fmm[scalfmm::options::_s(scalfmm::options::omp)](tree, fmm_operator, operator_to_proceed); - std::cout << " call algo scalfmm::algorithms::mpi::proc_task\n "; + std::cout << cpp_tools::colors::blue << "operator_to_proceed: "; + scalfmm::algorithms::print(operator_to_proceed); + std::cout << cpp_tools::colors::reset << std::endl; + scalfmm::algorithms::mpi::proc_task(letGroupTree, fmm_operator, operator_to_proceed); - std::cout << " end algo scalfmm::algorithms::mpi::proc_task\n "; + // std::clog << "End scalfmm::algorithms::mpi::proc_task\n"; /// /////////////////////////////////////////////////////////////////////////////////////////////////////// /// /////////////////////////////////////////////////////////////////////////////////////////////////////// /// Save the data - // const int nbDataPerRecord = scalfmm::container::particle_traits<particle_type>::number_of_elements; - // const int inputs_size = scalfmm::container::particle_traits<particle_type>::inputs_size; + if(!output_file.empty()) + { + const int nbDataPerRecord = scalfmm::container::particle_traits<particle_type>::number_of_elements; + const int inputs_size = scalfmm::container::particle_traits<particle_type>::inputs_size; + const bool verbose_write = para.io_master(); // True only for the master + scalfmm::io::DistFmaGenericWriter<value_type> writer(output_file, para, verbose_write); + /// Get the number of particles + // std::cout << "number_of_particles " << number_of_particles << std::endl; + /// + writer.writeHeader(centre, width, number_of_particles, sizeof(value_type), nbDataPerRecord, dimension, + inputs_size); + /// + writer.writeFromTree(letGroupTree, number_of_particles); + // std::cout << "End writing\n" << std::flush; + } - // // static constexpr std::size_t nbDataPerRecord = particle_type::number_of_elements; - // scalfmm::tools::DistFmaGenericWriter<value_type> writer(output_file, para); - // /// Get the number of particles - // std::cout << "number_of_particles " << number_of_particles << std::endl; - // /// - // writer.writeHeader(centre, width, number_of_particles, sizeof(value_type), nbDataPerRecord, dimension, - // inputs_size); - // /// - // writer.writeFromTree(letGroupTree, number_of_particles); // /// - // /////////////////////////////////////////////////////////////////////////////////////////////////////// - if(para.io_master()) - std::cout << "Save Tree in parallel\n"; - // std::string outName("saveTree_" + std::to_string(rank) + ".bin"); - std::string outName("saveTreeLet.bin"); - std::string header("CHEBYSHEV LOW RANK "); - scalfmm::tools::io::save(para, outName, letGroupTree, header); + // /////////////////////////////////////////////////////////////////////////////////////////////////////** + // #ifdef SCALFMM_DEBUG_MPI_1 + // if(para.io_master()) + // std::cout << "Save Tree in parallel\n"; + // // // std::string outName("saveTree_" + std::to_string(rank) + ".bin"); + // std::string outName("tree_let.bin"); + // std::string header("CHEBYSHEV LOW RANK "); + // scalfmm::tools::io::save(para, outName, letGroupTree, header); + // const int rank = para.get_process_id(); + // std::string outName1("tree_rank_" + std::to_string(rank) + ".bin"); + // std::string header1("CHEBYSHEV LOW RANK "); + // scalfmm::tools::io::save(outName1, letGroupTree, header1); + // #endif return 0; } @@ -231,7 +277,7 @@ auto main([[maybe_unused]] int argc, [[maybe_unused]] char* argv[]) -> int auto parser = cpp_tools::cl_parser::make_parser( cpp_tools::cl_parser::help{}, args::input_file(), args::output_file(), args::tree_height{}, args::order{}, args::thread_count{}, args::block_size{}, args::Dimension{}, local_args::PartDistrib{}, - local_args::PartLeafDistrib{}, local_args::LevelShared{}); + /*local_args::PartLeafDistrib{},*/ local_args::LevelShared{}); parser.parse(argc, argv); // Getting comm| line parameters const int tree_height{parser.get<args::tree_height>()}; @@ -242,14 +288,15 @@ auto main([[maybe_unused]] int argc, [[maybe_unused]] char* argv[]) -> int const auto output_file{parser.get<args::output_file>()}; const auto order{parser.get<args::order>()}; const auto dimension{parser.get<args::Dimension>()}; + const std::size_t nb_threads{parser.get<args::thread_count>()}; bool use_particle_distribution{parser.exists<local_args::PartDistrib>()}; bool use_leaf_distribution{!use_particle_distribution}; - if(parser.exists<local_args::PartLeafDistrib>()) - { - use_leaf_distribution = true; - use_particle_distribution = true; - } + // if(parser.exists<local_args::PartLeafDistrib>()) + // { + // use_leaf_distribution = true; + // use_particle_distribution = false; + // } if(para.io_master()) { @@ -258,13 +305,21 @@ auto main([[maybe_unused]] int argc, [[maybe_unused]] char* argv[]) -> int std::cout << cpp_tools::colors::blue << "<params> Group Size: " << group_size << cpp_tools::colors::reset << '\n'; std::cout << cpp_tools::colors::blue << "<params> order: " << order << cpp_tools::colors::reset << '\n'; + std::cout << cpp_tools::colors::blue << "<params> Proc num: " << para.get_num_processes() + << cpp_tools::colors::reset << '\n'; + std::cout << cpp_tools::colors::blue << "<params> Threads num: " << nb_threads << cpp_tools::colors::reset + << '\n'; if(!input_file.empty()) { std::cout << cpp_tools::colors::blue << "<params> Input file: " << input_file << cpp_tools::colors::reset << '\n'; } - std::cout << cpp_tools::colors::blue << "<params> Output file: " << output_file << cpp_tools::colors::reset - << '\n'; + if(!output_file.empty()) + { + std::cout << cpp_tools::colors::blue << "<params> Output file: " << output_file << cpp_tools::colors::reset + << '\n'; + } + std::cout << cpp_tools::colors::blue << "<params> Particle Distribution: " << std::boolalpha << use_particle_distribution << cpp_tools::colors::reset << '\n'; std::cout << cpp_tools::colors::blue << "<params> Leaf Distribution: " << std::boolalpha @@ -292,26 +347,28 @@ auto main([[maybe_unused]] int argc, [[maybe_unused]] char* argv[]) -> int use_leaf_distribution, use_particle_distribution); break; } - // case 3: - // { - // constexpr int dim = 3; - // using interpolation_type = interpolator_alias<value_type, dim, matrix_kernel_type, options_chebyshev>; - // using far_field_type = scalfmm::operators::far_field_operator<interpolation_type>; + case 3: + { + constexpr int dim = 3; + using interpolation_type = interpolator_alias<value_type, dim, matrix_kernel_type, options_chebyshev>; + using far_field_type = scalfmm::operators::far_field_operator<interpolation_type>; - // using ffm_type = scalfmm::operators::fmm_operators<near_field_type, far_field_type>; + using ffm_type = scalfmm::operators::fmm_operators<near_field_type, far_field_type>; - // run<dim, ffm_type>(para, input_file, output_file, tree_height, group_size, group_size, order, level_shared, - // use_leaf_distribution, use_particle_distribution); - // break; - // } + run<dim, ffm_type>(para, input_file, output_file, tree_height, group_size, group_size, order, level_shared, + use_leaf_distribution, use_particle_distribution); + break; + } default: { std::cerr << "Dimension should be only 2 or 3 !!\n"; } } #ifdef SCALFMM_USE_MPI - std::cout << std::flush; + std::cout << " barrier() \n" << std::flush; para.get_communicator().barrier(); #endif + std::cout << " end() \n" << std::flush; + para.end(); } diff --git a/guix-tools/python-exhale.scm b/guix-tools/python-exhale.scm new file mode 100644 index 0000000000000000000000000000000000000000..387a9017fd3e8304431318f1a1efdf2295410ef8 --- /dev/null +++ b/guix-tools/python-exhale.scm @@ -0,0 +1,57 @@ +(define-module (python-exhale) + #:use-module (guix) + #:use-module (guix packages) + #:use-module (guix download) + #:use-module (guix git-download) + #:use-module (guix hg-download) + #:use-module (guix gexp) + #:use-module (guix utils) + #:use-module (guix build-system python) + #:use-module (guix build-system pyproject) + #:use-module ((guix licenses) #:prefix license:) + #:use-module (gnu packages) + #:use-module (gnu packages certs) + #:use-module (gnu packages check) + #:use-module (gnu packages fonts) + #:use-module (gnu packages fontutils) + #:use-module (gnu packages graphviz) + #:use-module (gnu packages image) + #:use-module (gnu packages imagemagick) + #:use-module (gnu packages jupyter) + #:use-module (gnu packages python) + #:use-module (gnu packages sphinx) + #:use-module (gnu packages xml) + #:use-module (gnu packages python-build) + #:use-module (gnu packages python-check) + #:use-module (gnu packages python-crypto) + #:use-module (gnu packages python-web) + #:use-module (gnu packages python-xyz) + #:use-module (gnu packages time) + #:use-module (gnu packages python-science) + #:use-module (gnu packages graph) + #:use-module ((guix licenses) #:prefix license:) + #:use-module (gnu packages) + #:use-module (guix build-system gnu)) + +(define-public python-exhale + (package + (name "python-exhale") + (version "0.3.7") + (source + (origin + (method url-fetch) + (uri (pypi-uri "exhale" version)) + (sha256 + (base32 "1n5hsrg7swh535bd5b3f55ldcb343yld849kjcfm2mlllp89cakm")))) + (build-system pyproject-build-system) + (propagated-inputs (list python-beautifulsoup4 python-breathe python-lxml + python-six python-sphinx)) + (native-inputs (list python-setuptools python-wheel)) + (home-page "https://github.com/svenevs/exhale") + (synopsis + "Automatic C++ library API documentation generator using Doxygen, Sphinx, and") + (description + "Automatic C++ library API documentation generator using Doxygen, Sphinx, and.") + (license #f))) + +;; python-exhale diff --git a/guix-tools/scalfmm-manifest-clang-mkl.scm b/guix-tools/scalfmm-manifest-clang-mkl.scm index 285a3fce6e76c628f9f4ec9b6a4924a91d8d9de1..3dbff5ada11e18cf3d1a85768f66243e06740a2e 100644 --- a/guix-tools/scalfmm-manifest-clang-mkl.scm +++ b/guix-tools/scalfmm-manifest-clang-mkl.scm @@ -10,5 +10,6 @@ "ncurses" "intel-oneapi-mkl" "grep" + "openmpi" "findutils" "sed")) diff --git a/guix-tools/scalfmm-manifest-clang-openblas.scm b/guix-tools/scalfmm-manifest-clang-openblas.scm index 7dbe2c842d78332a5499a9eb2f5693717a6d34d2..dd5b7002628f647a91110c4aa3435b2b2e4fbad3 100644 --- a/guix-tools/scalfmm-manifest-clang-openblas.scm +++ b/guix-tools/scalfmm-manifest-clang-openblas.scm @@ -14,4 +14,5 @@ "pkg-config" "grep" "findutils" + "openmpi" "sed")) diff --git a/guix-tools/scalfmm-manifest-doc.scm b/guix-tools/scalfmm-manifest-doc.scm new file mode 100644 index 0000000000000000000000000000000000000000..5b84cd5d632593fcdfa2068c2e9af08590caf95f --- /dev/null +++ b/guix-tools/scalfmm-manifest-doc.scm @@ -0,0 +1,20 @@ +;; What follows is a "manifest" equivalent to the command line you gave. +;; You can store it in a file that you may then pass to any 'guix' command +;; that accepts a '--manifest' (or '-m') option. + +(specifications->manifest + (list "openblas" + "fftw" + "fftwf" + "cmake" + "make" + "gcc-toolchain" + "pkg-config" + "doxygen" + "coreutils" + "python" + "python-sphinx" + "python-recommonmark" + "python-breathe" + "python-sphinx-rtd-theme" + "python-exhale")) diff --git a/guix-tools/scalfmm-manifest-gcc-mkl.scm b/guix-tools/scalfmm-manifest-gcc-mkl.scm index 016062e39a3a9ce9ad384e5d93bbffa546fef988..c191baf58e1995ba21d85192aad633f7d9e585f0 100644 --- a/guix-tools/scalfmm-manifest-gcc-mkl.scm +++ b/guix-tools/scalfmm-manifest-gcc-mkl.scm @@ -11,4 +11,5 @@ "intel-oneapi-mkl" "grep" "findutils" + "openmpi" "sed")) diff --git a/guix-tools/scalfmm-manifest-gcc-openblas.scm b/guix-tools/scalfmm-manifest-gcc-openblas.scm index 3db24721bb79849d63f9ac7dee6e74e53c7bfe25..1619176d1008a1ad06353413ed26579ced2350b3 100644 --- a/guix-tools/scalfmm-manifest-gcc-openblas.scm +++ b/guix-tools/scalfmm-manifest-gcc-openblas.scm @@ -14,4 +14,5 @@ "pkg-config" "grep" "findutils" + "openmpi" "sed")) diff --git a/guix-tools/scalfmm-manifest-gcc11-openblas.scm b/guix-tools/scalfmm-manifest-gcc11-openblas.scm index 3db24721bb79849d63f9ac7dee6e74e53c7bfe25..1619176d1008a1ad06353413ed26579ced2350b3 100644 --- a/guix-tools/scalfmm-manifest-gcc11-openblas.scm +++ b/guix-tools/scalfmm-manifest-gcc11-openblas.scm @@ -14,4 +14,5 @@ "pkg-config" "grep" "findutils" + "openmpi" "sed")) diff --git a/guix-tools/scalfmm-manifest-gcc12-openblas.scm b/guix-tools/scalfmm-manifest-gcc12-openblas.scm index a7291f952ef762d63d4874a684aac2a8b2bbe00f..37f52784f54d4f89a839aa8f830c89d8351d62a5 100644 --- a/guix-tools/scalfmm-manifest-gcc12-openblas.scm +++ b/guix-tools/scalfmm-manifest-gcc12-openblas.scm @@ -14,4 +14,5 @@ "pkg-config" "grep" "findutils" + "openmpi" "sed")) diff --git a/guix-tools/scalfmm-manifest-gcc13-openblas.scm b/guix-tools/scalfmm-manifest-gcc13-openblas.scm index a6a02f26d7ed0cd31fffe7c93eb2588d31f45eb9..d6d2f99c3817b4a8922ef23498cb758ae1827337 100644 --- a/guix-tools/scalfmm-manifest-gcc13-openblas.scm +++ b/guix-tools/scalfmm-manifest-gcc13-openblas.scm @@ -14,4 +14,5 @@ "pkg-config" "grep" "findutils" + "openmpi" "sed")) diff --git a/guix-tools/scalfmm-manifest-gcc14-openblas.scm b/guix-tools/scalfmm-manifest-gcc14-openblas.scm index cb5b2d1f3c2c01e0f69739ab7843e256b1ad7f1c..2fc252c60a377b1b03bfe58106678be20e41e051 100644 --- a/guix-tools/scalfmm-manifest-gcc14-openblas.scm +++ b/guix-tools/scalfmm-manifest-gcc14-openblas.scm @@ -14,4 +14,5 @@ "pkg-config" "grep" "findutils" + "openmpi" "sed")) diff --git a/include/scalfmm/algorithms/mpi/direct.hpp b/include/scalfmm/algorithms/mpi/direct.hpp index d08d8fe4df9a7f5f177d7e5dfb14c4f00c0a702e..690c5e57df85a26d5a567b5deec63ae46604801b 100644 --- a/include/scalfmm/algorithms/mpi/direct.hpp +++ b/include/scalfmm/algorithms/mpi/direct.hpp @@ -23,7 +23,7 @@ namespace scalfmm::algorithms::mpi::pass * step 3 send/receive the particles * * @tparam TreeS - * + * * @param tree_source source tree (contains the particles) */ template<typename TreeS> @@ -33,134 +33,159 @@ namespace scalfmm::algorithms::mpi::pass auto comm = para.get_communicator(); auto rank = para.get_process_id(); auto nb_proc = para.get_num_processes(); - if(nb_proc == 1) + if(nb_proc > 1) { // Openmp case -> no communication - return; - } - // - using grp_access_type = std::pair<decltype(tree_source.begin_leaves()), int>; - using mortonIdx_type = std::int64_t; - // - std::vector<std::vector<grp_access_type>> leaf_to_receive_access(nb_proc); - std::vector<std::vector<grp_access_type>> leaf_to_send_access(nb_proc); - std::vector<std::vector<mortonIdx_type>> morton_to_receive(nb_proc); // TOREMOVE - std::vector<int> nb_messages_to_send(nb_proc, 0); - std::vector<int> nb_messages_to_receive(nb_proc, 0); - /// - auto begin_left_ghost = tree_source.begin_leaves(); - auto end_left_ghost = tree_source.begin_mine_leaves(); - auto begin_right_ghost = tree_source.end_mine_leaves(); - auto end_right_ghost = tree_source.end_leaves(); - // - //print leaf block - // for(auto it = end_left_ghost; it != begin_right_ghost; ++it) - // { - // std::cout << **it << std::endl; - // } - // - auto const& leaf_distribution = tree_source.get_leaf_distribution(); - - scalfmm::parallel::comm::start_step1(comm, begin_left_ghost, end_left_ghost, begin_right_ghost, - end_right_ghost, leaf_distribution, nb_messages_to_receive, - nb_messages_to_send, leaf_to_receive_access, morton_to_receive); - // for(auto p = 0; p < nb_proc; ++p) - // { - // io::print(" morton to receive[" + std::to_string(p) + "] ", morton_to_receive[p]); - // } - // io::print(" nb_messages_to_receive ", nb_messages_to_receive); - // io::print(" nb_messages_to_send ", nb_messages_to_send); - // - std::vector<std::vector<mortonIdx_type>> morton_to_send(nb_proc); - // - scalfmm::parallel::comm::start_step2(nb_proc, rank, comm, nb_messages_to_receive, nb_messages_to_send, - morton_to_receive, morton_to_send); - - ///////////////////////////////////////////////////////////////////////////////// - /// STEP 3 - ///////////////////////////////////////////////////////////////////////////////// - // send the particles - // morton_to_send list des indices de Morton. - // leaf_to_send_access (ptr on the group and index into the group) - // - auto begin_grp = tree_source.begin_mine_leaves(); - auto end_grp = tree_source.end_mine_leaves(); - - scalfmm::parallel::comm::build_direct_access_to_leaf(nb_proc, begin_grp, end_grp, leaf_to_send_access, - morton_to_send); - // - // Build the mpi type for the particles - // - static constexpr std::size_t dimension = TreeS::base_type::leaf_type::dimension; - static constexpr std::size_t inputs_size = TreeS::base_type::leaf_type::inputs_size; - - using position_coord_type = typename TreeS::base_type::leaf_type::position_coord_type; - using inputs_type_ori = typename TreeS::base_type::leaf_type::inputs_type; - - static_assert(!meta::is_complex_v<inputs_type_ori>, "input complex type not yet supported."); - using inputs_type1 = inputs_type_ori; - using inputs_type = std::conditional_t<meta::is_complex_v<inputs_type_ori>, - meta::has_value_type_t<inputs_type_ori>, inputs_type_ori>; - // for complex value (2) otherwise 1 NOT YET USED for particles - int nb_input_values = meta::is_complex_v<inputs_type_ori> ? 2 : 1; - - auto mpi_position_type = cpp_tools::parallel_manager::mpi::get_datatype<position_coord_type>(); - auto mpi_input_type = cpp_tools::parallel_manager::mpi::get_datatype<inputs_type>(); - // - - // build and commit the MPI type of the particle to send - // std::cout << "=================== Send type ========================\n"; - - auto particle_type_to_send = scalfmm::parallel::comm::build_mpi_particles_type<dimension>( - leaf_to_send_access, inputs_size, mpi_position_type, mpi_input_type); - - // send the particles - for(auto p = 0; p < nb_proc; ++p) - { - if(leaf_to_send_access[p].size() != 0) + + // + using grp_access_type = std::pair<decltype(tree_source.begin_leaves()), int>; + using mortonIdx_type = std::int64_t; + // + std::vector<std::vector<grp_access_type>> leaf_to_receive_access(nb_proc); + std::vector<std::vector<grp_access_type>> leaf_to_send_access(nb_proc); + std::vector<std::vector<mortonIdx_type>> morton_to_receive(nb_proc); // TOREMOVE + std::vector<int> nb_messages_to_send(nb_proc, 0); + std::vector<int> nb_messages_to_receive(nb_proc, 0); + /// + auto begin_left_ghost = tree_source.begin_leaves(); + auto end_left_ghost = tree_source.begin_mine_leaves(); + auto begin_right_ghost = tree_source.end_mine_leaves(); + auto end_right_ghost = tree_source.end_leaves(); + // + //print leaf block + // for(auto it = end_left_ghost; it != begin_right_ghost; ++it) + // { + // std::cout << **it << std::endl; + // } + // + auto const& leaf_distribution = tree_source.get_leaf_distribution(); + + scalfmm::parallel::comm::start_step1(comm, begin_left_ghost, end_left_ghost, begin_right_ghost, + end_right_ghost, leaf_distribution, nb_messages_to_receive, + nb_messages_to_send, leaf_to_receive_access, morton_to_receive); + // for(auto p = 0; p < nb_proc; ++p) + // { + // io::print(" morton to receive[" + std::to_string(p) + "] ", morton_to_receive[p]); + // } + // io::print(" nb_messages_to_receive ", nb_messages_to_receive); + // io::print(" nb_messages_to_send ", nb_messages_to_send); + // + std::vector<std::vector<mortonIdx_type>> morton_to_send(nb_proc); + // + scalfmm::parallel::comm::start_step2(nb_proc, rank, comm, nb_messages_to_receive, nb_messages_to_send, + morton_to_receive, morton_to_send); + + ///////////////////////////////////////////////////////////////////////////////// + /// STEP 3 + ///////////////////////////////////////////////////////////////////////////////// + // send the particles + // morton_to_send list des indices de Morton. + // leaf_to_send_access (ptr on the group and index into the group) + // + auto begin_grp = tree_source.begin_mine_leaves(); + auto end_grp = tree_source.end_mine_leaves(); + + scalfmm::parallel::comm::build_direct_access_to_components(nb_proc, begin_grp, end_grp, + leaf_to_send_access, morton_to_send); + // + // Build the mpi type for the particles + // + static constexpr std::size_t dimension = TreeS::base_type::leaf_type::dimension; + static constexpr std::size_t inputs_size = TreeS::base_type::leaf_type::inputs_size; + + using position_coord_type = typename TreeS::base_type::leaf_type::position_coord_type; + using inputs_type_ori = typename TreeS::base_type::leaf_type::inputs_type; + + static_assert(!meta::is_complex_v<inputs_type_ori>, "input complex type not yet supported."); + using inputs_type1 = inputs_type_ori; + using inputs_type = std::conditional_t<meta::is_complex_v<inputs_type_ori>, + meta::has_value_type_t<inputs_type_ori>, inputs_type_ori>; + // for complex value (2) otherwise 1 NOT YET USED for particles + int nb_input_values = meta::is_complex_v<inputs_type_ori> ? 2 : 1; + + auto mpi_position_type = cpp_tools::parallel_manager::mpi::get_datatype<position_coord_type>(); + auto mpi_input_type = cpp_tools::parallel_manager::mpi::get_datatype<inputs_type>(); + // + + // build and commit the MPI type of the particle to send + // std::cout << "=================== Send type ========================\n"; + + auto particle_type_to_send = scalfmm::parallel::comm::build_mpi_particles_type<dimension>( + leaf_to_send_access, inputs_size, mpi_position_type, mpi_input_type); + + // send the particles + for(auto p = 0; p < nb_proc; ++p) { - comm.isend(MPI_BOTTOM, 1, particle_type_to_send[p], p, 777); + if(leaf_to_send_access[p].size() != 0) + { + comm.isend(MPI_BOTTOM, 1, particle_type_to_send[p], p, 777); + } } - } - // - // receive the particle - std::vector<cpp_tools::parallel_manager::mpi::request> recept_mpi_status; - // build and commit the MPI type of the particle to receive - // std::cout << "=================== Receive type ========================\n"; - - auto particle_type_to_receive = scalfmm::parallel::comm::build_mpi_particles_type<dimension>( - leaf_to_receive_access, inputs_size, mpi_position_type, mpi_input_type); - - for(auto p = 0; p < nb_proc; ++p) - { - if(leaf_to_receive_access[p].size() != 0) + // + // receive the particle + std::vector<cpp_tools::parallel_manager::mpi::request> recept_mpi_status; + // build and commit the MPI type of the particle to receive + // std::cout << "=================== Receive type ========================\n"; + + auto particle_type_to_receive = scalfmm::parallel::comm::build_mpi_particles_type<dimension>( + leaf_to_receive_access, inputs_size, mpi_position_type, mpi_input_type); + + for(auto p = 0; p < nb_proc; ++p) { - recept_mpi_status.push_back(comm.irecv(MPI_BOTTOM, 1, particle_type_to_receive[p], p, 777)); + if(leaf_to_receive_access[p].size() != 0) + { + recept_mpi_status.push_back(comm.irecv(MPI_BOTTOM, 1, particle_type_to_receive[p], p, 777)); + } } - } - if(recept_mpi_status.size() > 0) - { - cpp_tools::parallel_manager::mpi::request::waitall(recept_mpi_status.size(), recept_mpi_status.data()); + if(recept_mpi_status.size() > 0) + { + cpp_tools::parallel_manager::mpi::request::waitall(recept_mpi_status.size(), + recept_mpi_status.data()); + } + + //print leaf block + // std::cout << "==========================================================\n"; + // int id_group{0}; + // for(auto ptr_group = begin_left_ghost; ptr_group != end_right_ghost; ++ptr_group) + // { + // auto const& current_group_symbolics = (*ptr_group)->csymbolics(); + + // std::cout << "*** Group of leaf index " << ++id_group << " *** index in [" + // << current_group_symbolics.starting_index << ", " << current_group_symbolics.ending_index + // << "["; + // std::cout << ", is_mine: " << std::boolalpha << current_group_symbolics.is_mine << "\n"; + // std::cout << " group size: " << current_group_symbolics.number_of_component_in_group << ", "; + // std::cout << "global index = " << current_group_symbolics.idx_global << " \n"; + // std::cout << " index: "; + // (*ptr_group)->cstorage().print_block_data(std::cout); + // } } - //print leaf block - // std::cout << "==========================================================\n"; - // int id_group{0}; - // for(auto ptr_group = begin_left_ghost; ptr_group != end_right_ghost; ++ptr_group) - // { - // auto const& current_group_symbolics = (*ptr_group)->csymbolics(); - - // std::cout << "*** Group of leaf index " << ++id_group << " *** index in [" - // << current_group_symbolics.starting_index << ", " << current_group_symbolics.ending_index - // << "["; - // std::cout << ", is_mine: " << std::boolalpha << current_group_symbolics.is_mine << "\n"; - // std::cout << " group size: " << current_group_symbolics.number_of_component_in_group << ", "; - // std::cout << "global index = " << current_group_symbolics.idx_global << " \n"; - // std::cout << " index: "; - // (*ptr_group)->cstorage().print_block_data(std::cout); - // } - } // en d start_communications + // #ifndef _DEBUG_BLOCK_DATA + // std::clog << " FINAl block\n"; + // int id_group{0}; + // auto group_of_leaves = tree_source.vector_of_leaf_groups(); + // for(auto pg: group_of_leaves) + // { + // auto const& current_group_symbolics = pg->csymbolics(); + // std::cout << "*** Group of leaf index " << ++id_group << " *** index in [" + // << current_group_symbolics.starting_index << ", " << current_group_symbolics.ending_index + // << "["; + // std::cout << ", is_mine: " << std::boolalpha << current_group_symbolics.is_mine << "\n"; + // std::cout << " group size: " << current_group_symbolics.number_of_component_in_group << ", "; + // std::cout << "global index = " << current_group_symbolics.idx_global << " \n" << std::flush; + // // std::cout << " index: "; + // // std::clog << "block index " << tt++ << std::endl; + // pg->cstorage().print_block_data(std::clog); + // } + // std::clog << " ---------------------------------------------------\n"; + // #endif + } // end start_communications } // namespace comm + // template<typename Tree, typename NearField> + // inline auto direct_mine_ghost(Tree const& tree, NearField const& nearfield) -> void + // { + // } /** * @brief Compute direct interaction between particles * @@ -181,7 +206,11 @@ namespace scalfmm::algorithms::mpi::pass comm::start_communications(tree_source); // std::cout << " end comm " << std::endl << std::flush; // std::cout << "&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&" << std::endl; - scalfmm::algorithms::omp::pass::direct(tree_source, tree_target, near_field); + omp::pass::direct(tree_source, tree_target, near_field); + // if(nearfield.mutual()) + // { + // direct_mine_ghost(tree_target, near_field); + // } } } // namespace scalfmm::algorithms::mpi::pass diff --git a/include/scalfmm/algorithms/mpi/downward.hpp b/include/scalfmm/algorithms/mpi/downward.hpp index 71d648a16148e64158b05bb6906ce800cf96aa2a..1ed5e10910fb96fa29555f6cf0573c5b92512109 100644 --- a/include/scalfmm/algorithms/mpi/downward.hpp +++ b/include/scalfmm/algorithms/mpi/downward.hpp @@ -7,16 +7,47 @@ #include "scalfmm/operators/l2l.hpp" #ifdef _OPENMP - -#include <omp.h> - #include "scalfmm/algorithms/omp/downward.hpp" +#include <omp.h> #endif // _OPENMP #include <cpp_tools/parallel_manager/parallel_manager.hpp> namespace scalfmm::algorithms::mpi::pass { + /// @brief Construct the vector of dependencies (child group) + /// @tparam IteratorType + /// @tparam MortonType + /// @tparam Dependencies_t + /// @tparam dimension + /// @param begin first iterator on the groups of cells (child) + /// @param end last iterator on the groups of cells (child) + /// @param parent_morton_index the parent index + /// @param dependencies the vector of dependencies + template<int dimension, typename IteratorType, typename MortonType, typename Dependencies_t> + void build_downward_dependencies(IteratorType begin, IteratorType end, MortonType const& parent_morton_index, + Dependencies_t& dependencies) + { + for(auto grp_ptr = begin; grp_ptr != end; ++grp_ptr) + { + auto const& csymb = (*grp_ptr)->csymbolics(); + // iterate on the cells in the same group + // we move forward in the index vector + // std::cout << "[" << csymb.starting_index << " < " << parent_morton_index << " < " << csymb.ending_index + // << "] ?" << std::endl + // << std::flush; + if(parent_morton_index == ((csymb.ending_index - 1) >> dimension)) + { + // std::cout << parent_morton_index << " add depend for grp with Int [" << csymb.starting_index << ", " + // << csymb.ending_index << "]" << std::endl; + dependencies.push_back(&(grp_ptr->get()->ccomponent(0).clocals(0))); + } + else + { + break; + } + } + } /** * @brief perform the l2l communications for the father level * @@ -28,86 +59,169 @@ namespace scalfmm::algorithms::mpi::pass template<typename Tree> inline auto downward_communications_level(const int& level, Tree& tree) -> void { + // value_type value of the local array using value_type = typename Tree::base_type::cell_type::value_type; - static constexpr int nb_inputs = Tree::cell_type::storage_type::inputs_size; - static constexpr std::size_t dimension = Tree::base_type::box_type::dimension; + using dep_type = typename Tree::group_of_cell_type::symbolics_type::ptr_multi_dependency_type; + static constexpr int nb_outputs = Tree::cell_type::storage_type::outputs_size; + static constexpr std::size_t dimension = Tree::base_type::box_type::dimension; + // number of theoretical children + constexpr int nb_children = math::pow(2, dimension); + static constexpr auto prio{omp::priorities::max}; + // auto child_level = level + 1; - auto const& distrib = tree.get_cell_distribution(child_level); - // compute the size of the multipoles to send (generic) versus math::pow(order, dimension) - auto it_group = tree.end_mine_cells(level) - 1; // last group the I own - auto pos = it_group->get()->size() - 1; // index of the last cell in the group - auto const& cell = it_group->get()->component(pos); // the cell - - auto const& m = cell.cmultipoles(); - auto size{int(nb_inputs * m.at(0).size())}; + // compute the size of the locals to send (generic) versus math::pow(order, dimension) + auto it_last_parent_group = tree.end_mine_cells(level) - 1; // last group I own father + auto pos = it_last_parent_group->get()->size() - 1; // index of the last cell + auto const& cell = it_last_parent_group->get()->component(pos); // the cell + auto const& m = cell.clocals(); + auto size_local{int(nb_outputs * m.at(0).size())}; // size of a local + // // For the communications auto& para = tree.get_parallel_manager(); - auto comm = para.get_communicator(); - auto rank = comm.rank(); - int nb_proc = comm.size(); + auto* comm = &(para.get_communicator()); + auto rank = comm->rank(); + int nb_proc = comm->size(); int tag_data = 2201 + 10 * level; - - // Send + std::vector<dep_type> dependencies_in; + // + auto ptr_tree = &tree; + auto const& distrib = tree.get_cell_distribution(child_level); + // std::clog << "distrib me [" << distrib[rank][0] << "," << distrib[rank][1] << "]\n"; + // Send to the right the last locals if(rank != nb_proc - 1) { + // std::clog << " Send step " << level << "\n"; + // get the distribution at child level auto last_child_index = distrib[rank][1] - 1; auto first_child_index_after_me = distrib[rank + 1][0]; - // dependencies in on th group - if((last_child_index >> dimension) == (first_child_index_after_me >> dimension)) + // dependencies in on the group + // Check if the last mine and the first right ghost have the same father + auto parent_morton_index = last_child_index >> dimension; + // std::clog << " downward last_child_index " << last_child_index << " its parent " << parent_morton_index + // << " first_child_index_after_me " << first_child_index_after_me << " its parent " + // << (first_child_index_after_me >> dimension) << std::endl + // << std::flush; + if(parent_morton_index == (first_child_index_after_me >> dimension)) { - std::vector<value_type> buffer(size); + // Two processes share the same parent + // iterator on the my first child + auto first_group_of_child = tree.begin_mine_cells(child_level); + auto first_index_child = first_group_of_child->get()->component(0).index(); + auto parent_of_last_index_child = first_index_child >> dimension; - // I have to send a message from my right to update the multipoles of the first - // cells of the right ghosts. - // temporary buffer + std::cout << std::flush; + // dependencies on the parent group + auto dep_parent = &(it_last_parent_group->get()->ccomponent(0).clocals(0)); + // std::cout << " downward dep(in) on groupe dep_parent " << dep_parent << std::endl << std::flush; + // depend(iterator(std::size_t it = 0 dependencies.size()), inout : (dependencies[it])[0]), - auto nb_m = m.size(); - auto it = std::begin(buffer); - for(std::size_t i{0}; i < nb_m; ++i) +#pragma omp task default(none) firstprivate(comm, rank, tag_data, it_last_parent_group, last_child_index) \ + shared(std::clog) depend(in : dep_parent[0], ptr_tree[0]) priority(prio) { - auto const& ten = m.at(i); - std::copy(std::begin(ten), std::end(ten), it); - it += ten.size(); - } - auto mpi_type = cpp_tools::parallel_manager::mpi::get_datatype<value_type>(); + // I have to send a message from my right to update the multipoles of the first + // cells of the right ghosts. + // temporary buffer + auto pos = it_last_parent_group->get()->size() - 1; + auto const& cell = it_last_parent_group->get()->component(pos); // the cell - comm.isend(buffer.data(), size, mpi_type, rank + 1, tag_data); - } - } + auto const& m = cell.clocals(); + auto size_local{int(nb_outputs * m.at(0).size())}; + auto nb_m = m.size(); + std::vector<value_type> buffer(size_local); + + // std::cout << "cell index: " << cell.index() << " = parent " << (last_child_index >> dimension) + // << "\n"; + // loop to serialize the locals + auto it = std::begin(buffer); + for(std::size_t i{0}; i < nb_m; ++i) + { + auto const& ten = m.at(i); + std::copy(std::begin(ten), std::end(ten), it); + it += ten.size(); + } + // io::print("buffer(send) ", buffer); + + auto mpi_type = cpp_tools::parallel_manager::mpi::get_datatype<value_type>(); + // std::clog << " send buffer to " << rank + 1 << std::endl; + comm->isend(buffer.data(), size_local, mpi_type, rank + 1, tag_data); + // std::cout << " downward(task_send) buffer(rank=" << std::to_string(rank) << "): " << std::flush; + // for(int i = 0; i < buffer.size(); ++i) + // { + // std::cout << " " << buffer[i] << std::flush; + // } + // std::cout << std::endl << std::flush; + } // end task + } // end same parent + } // end rank !- proc -1 // Receive if(rank > 0) { + // std::clog << "Receive step\n"; + auto last_child_index_before_me = distrib[rank - 1][1] - 1; auto first_child_index = distrib[rank][0]; // dependencies out on the group - + // check if same parent + // std::clog << "downward receive comm last_child_index_before_me " << last_child_index_before_me + // << " parent " << (last_child_index_before_me >> dimension) << " first_child_index " + // << first_child_index << " its parent " << (first_child_index >> dimension) << std::endl + // << std::flush; if((last_child_index_before_me >> dimension) == (first_child_index >> dimension)) { - std::vector<value_type> buffer(size); - - // I have to receive a message from my left to update the multipoles of the last - // cells of the left ghosts. - auto mpi_type = cpp_tools::parallel_manager::mpi::get_datatype<value_type>(); - comm.recv(buffer.data(), size, mpi_type, rank - 1, tag_data); - - /// set the multipoles in the ghost - auto it_group = tree.begin_mine_cells(level) - 1; // last left ghosts - auto pos = it_group->get()->size() - 1; // index of the last cell in the group - auto& cell = it_group->get()->component(pos); - auto& m = cell.multipoles(); - - auto nb_m = m.size(); - auto it = std::begin(buffer); - for(std::size_t i{0}; i < nb_m; ++i) + // task to do + // std::cout << " downward receive task to do perform " << std::endl << std::flush; + + // dependencies on left ghost parent + auto gs = it_last_parent_group->get()->size(); + // std::cout << " gs = " << gs << std::endl << std::flush; + int nb_grp_dep = + std::min(static_cast<int>(nb_children / gs + 1), + static_cast<int>(std::distance(it_last_parent_group, tree.end_cells(child_level)))); + auto it_last_parent_group = tree.begin_mine_cells(level) - 1; + auto dep_ghost_parent = &(it_last_parent_group->get()->ccomponent(0).clocals(0)); + // std::cout << " downward(receive) dependencies(out): " << dep_ghost_parent << std::endl << std::flush; + +#pragma omp task default(none) firstprivate(comm, rank, tag_data, size_local, it_last_parent_group) shared(std::clog) \ + depend(out : dep_ghost_parent[0], ptr_tree[0]) priority(prio) { - auto& ten = m.at(i); - std::copy(it, it + ten.size(), std::begin(ten)); - it += ten.size(); - } - } - } + // std::clog << " Same parent\n "; + // Same parent, I have to receive a message from my left + // to update the locals of the last cells of the left ghosts. + std::vector<value_type> buffer(size_local); + // blocking receive ( We block the submission of L2L tasks at this level ) + auto mpi_type = cpp_tools::parallel_manager::mpi::get_datatype<value_type>(); + comm->recv(buffer.data(), size_local, mpi_type, rank - 1, tag_data); + // std::cout << " downward(task receive) buffer:"; + // for(int i = 0; i < buffer.size(); ++i) + // { + // std::cout << " " << buffer[i]; + // } + // std::cout << std::endl; + /// set the locals in the last left ghosts and in last cell + auto it_group = it_last_parent_group; + auto pos = it_group->get()->size() - 1; // index of the last cell in the group + auto& cell = it_group->get()->component(pos); + auto& m = cell.locals(); + // std::clog << "cell index: " << cell.index() << " = parent " << (cell.index() >> dimension) << "\n"; + + auto nb_m = m.size(); + // std::cout << " cell index: " << cell.index() << " level " << cell.csymbolics().level << "\n"; + // io::print("buffer(recv) ", buffer); + auto it = std::begin(buffer); + for(std::size_t i{0}; i < nb_m; ++i) + { + auto& ten = m.at(i); + // std::cout << " ten before " << ten << std::endl; + std::copy(it, it + ten.size(), std::begin(ten)); + // std::transform(it, it + ten.size(), std::begin(ten), std::begin(ten), std::plus<>{}); + // std::cout << " ten after " << ten << std::endl; + it += ten.size(); + } + } // end task + } // end same parent + } // end rank > 0 } /** @@ -128,8 +242,14 @@ namespace scalfmm::algorithms::mpi::pass for(std::size_t level = top_height; level < leaf_level; ++level) { + // std::cout << " L2L downward : " << level << " -> " << level + 1 << std::endl << std::flush; + // std::cout << "&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&" << std::endl; + // update the ghost at the current level (father) downward_communications_level(level, tree); + // std::cout << " end downward comm " << level << std::endl << std::flush; + // std::cout << "&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&" << std::endl; + // compute at the level omp::pass::downward_level(level, tree, approximation); } } diff --git a/include/scalfmm/algorithms/mpi/proc_task.hpp b/include/scalfmm/algorithms/mpi/proc_task.hpp index bea9aef9bc125696b0479c474b996dba68e02f1f..798cc5ead69f54bea23c27c9e8f3c5c186b20ab5 100644 --- a/include/scalfmm/algorithms/mpi/proc_task.hpp +++ b/include/scalfmm/algorithms/mpi/proc_task.hpp @@ -74,7 +74,7 @@ namespace scalfmm::algorithms::mpi << "WARNING the task priorities are not (fully) available. set OMP_MAX_TASK_PRIORITY to " << omp::priorities::max + 1 << cpp_tools::colors::reset << std::endl; } - + tree_target.get_parallel_manager().get_communicator().barrier(); const auto op = tree_target.height() == 2 ? operators_to_proceed::p2p : op_in; if constexpr(options::has(s, options::timit)) @@ -94,18 +94,23 @@ namespace scalfmm::algorithms::mpi } if((op & operators_to_proceed::p2p) == operators_to_proceed::p2p) { + // std::cout << "pass::direct \n"; pass::direct(tree_source, tree_target, near_field); } + if(tree_target.is_interaction_m2l_lists_built() == false) { list::omp::build_m2l_interaction_list(tree_source, tree_target, separation_criterion); } + if((op & operators_to_proceed::p2m) == operators_to_proceed::p2m) { + // std::cout << "pass::leaf_to_cell \n"; scalfmm::algorithms::omp::pass::leaf_to_cell(tree_source, far_field); } if((op & operators_to_proceed::m2m) == operators_to_proceed::m2m) { + // std::cout << "pass::upward \n"; pass::upward(tree_source, far_field.approximation()); } // if(same_tree && tree_target.box().is_periodic()) @@ -116,16 +121,21 @@ namespace scalfmm::algorithms::mpi // } if((op & operators_to_proceed::m2l) == operators_to_proceed::m2l) { + // std::cout << "pass::transfer remove_leaf_level \n"; pass::transfer(tree_source, tree_target, far_field, buffers, scalfmm::algorithms::omp::pass::split_m2l::remove_leaf_level); } if((op & operators_to_proceed::l2l) == operators_to_proceed::l2l) { + // std::cout << "pass::downward\n"; + pass::downward(tree_target, far_field.approximation()); } if((op & operators_to_proceed::m2l) == operators_to_proceed::m2l) { + // std::cout << "pass::transfer leaf_level \n"; + pass::transfer(tree_source, tree_target, far_field, buffers, scalfmm::algorithms::omp::pass::split_m2l::leaf_level); } @@ -136,6 +146,26 @@ namespace scalfmm::algorithms::mpi } } // end parallel +#ifdef _DEBUG_BLOCK_DATA + std::clog << "\n"; + std::clog << " FINAl block (end proc_task)\n"; + int id_group{0}; + auto group_of_leaves = tree_source.vector_of_leaf_groups(); + for(auto pg: group_of_leaves) + { + auto const& current_group_symbolics = pg->csymbolics(); + std::clog << "*** Group of leaf index " << ++id_group << " *** index in [" + << current_group_symbolics.starting_index << ", " << current_group_symbolics.ending_index + << "["; + std::clog << ", is_mine: " << std::boolalpha << current_group_symbolics.is_mine << "\n"; + std::clog << " group size: " << current_group_symbolics.number_of_component_in_group << ", "; + std::clog << "global index = " << current_group_symbolics.idx_global << " \n" << std::flush; + // std::cout << " index: "; + // std::clog << "block index " << tt++ << std::endl; + pg->cstorage().print_block_data(std::clog); + } + std::clog << " ---------------------------------------------------\n"; +#endif scalfmm::algorithms::omp::impl::delete_buffers(buffers); if constexpr(options::has(s, options::timit)) diff --git a/include/scalfmm/algorithms/mpi/transfer.hpp b/include/scalfmm/algorithms/mpi/transfer.hpp index 665fbc036cba80ea6d72a9003a5de55c8ccd1846..5cb2dd20277d1b0981d0c18ac5ddc72aec26dcec 100644 --- a/include/scalfmm/algorithms/mpi/transfer.hpp +++ b/include/scalfmm/algorithms/mpi/transfer.hpp @@ -1,12 +1,18 @@ // -------------------------------- // See LICENCE file at project root -// File : scalfmm/algorithms/mpi/transfer.hpp +// File : algorithm/omp/upward.hpp // -------------------------------- #ifndef SCALFMM_ALGORITHMS_MPI_TRANSFER_HPP #define SCALFMM_ALGORITHMS_MPI_TRANSFER_HPP #ifdef _OPENMP +#include <iostream> +#include <map> +#include <omp.h> +#include <ostream> +#include <utility> + #include "scalfmm/algorithms/omp/transfer.hpp" #include "scalfmm/meta/traits.hpp" #include "scalfmm/operators/tags.hpp" @@ -19,12 +25,596 @@ #include <cpp_tools/parallel_manager/parallel_manager.hpp> -#include <map> -#include <omp.h> -#include <utility> - namespace scalfmm::algorithms::mpi::pass { + template<typename ContainerType> + void print_access(std::ostream& out, ContainerType& access) + { + // for(auto p = 0; p < cells_to_access.size(); ++p) + // { + // auto& access = cells_to_access[p]; + + out << "task_send_multipole_at_level for process size " << access.size() << std::endl << std::flush; + for(auto i = 0; i < access.size(); ++i) + { + out << " task_send " << i << " ptr " << std::flush << access[i].first->get() << " morton " + << (*(access[i].first))->component(access[i].second).csymbolics().morton_index << std::flush + << "multipole " << (*(access[i].first))->component(access[i].second).transfer_multipoles().at(0) + << std::endl + << std::flush; + } + out << "---------------------------" << std::endl << std::flush; + // } + } + /** + * @brief Build the buffer of multipole (no more used) + * + * @tparam IteratorType + * @tparam StructMorton + * @tparam BufferType + */ + template<typename IteratorType, typename VectorOfVectorMortonType, typename BufferType> + auto build_buffer_func(IteratorType first_group_ghost, IteratorType last_group_ghost, + VectorOfVectorMortonType const& index_to_send, BufferType& buffer) + { + try + { + // std::cout << " ----- build_buffer ----\n" <<std::flush; + // std::cout << " ----- buffer size " <<buffer.size() <<std::endl<<std::flush; + // std::cout << " ----- index_to_send size " <<index_to_send.size() <<std::endl<<std::flush; + int idx{0}; + int max_idx = index_to_send.size(); // loop on the groups + auto it = std::begin(buffer); + for(auto grp_ptr = first_group_ghost; grp_ptr != last_group_ghost; ++grp_ptr) + { + int start_grp{0}; + + auto const& csymb = (*grp_ptr)->csymbolics(); + // iterate on the cells + while(idx < max_idx and math::between(index_to_send[idx], csymb.starting_index, csymb.ending_index)) + { // find cell inside the group + int pos{-1}; + for(int i = start_grp; i < (*grp_ptr)->size(); ++i) + { + auto morton = (*grp_ptr)->component(i).csymbolics().morton_index; + if(index_to_send[idx] == morton) + { + pos = i; + start_grp = i + 1; + // std::cout << " pos = " << pos << std::endl; + break; + } + } + // std::cout << " morton to find " << index_to_send[idx] << " cell found " + // << (*grp_ptr)->component(pos).csymbolics().morton_index << '\n'; + auto const& cell = (*grp_ptr)->component(pos); + auto const& m = cell.ctransfer_multipoles(); + auto nb_m = m.size(); + // std::cout << " nb_m" << m.size() <<std::endl; + for(std::size_t i{0}; i < nb_m; ++i) + { + auto const& ten = m.at(i); + std::copy(std::begin(ten), std::end(ten), it); + it += ten.size(); + } + ++idx; + } + } + // std::cout << " ----- build_buffer ----\n" <<std::flush; + } + catch(std::exception& e) + { + std::cout << " error in buffer building !!!!!!!!!\n"; + std::cout << e.what() << '\n' << std::flush; + } + } + template<typename BufferType, typename AccessType> + auto build_buffer(const int nb_inputs, AccessType const& cells_to_send_access) -> BufferType + { + BufferType buffer; + try + { + //number of cells x nb inputs x size of an input + + auto const& cell = (cells_to_send_access[0].first->get())->component(cells_to_send_access[0].second); + const int multipoleSize{int(cell.transfer_multipoles().at(0).size())}; + + const int buffer_size{int(cells_to_send_access.size()) * nb_inputs * multipoleSize}; + buffer.resize(buffer_size); + std::cout << " ----- build_buffer ----\n" << std::flush; + std::cout << " ----- buffer size " << buffer.size() << std::endl << std::flush; + auto it = std::begin(buffer); + // iterate on the cells + for(auto access: cells_to_send_access) + { + auto const& cell = (*(access.first))->component(access.second); + + // std::cout << " morton to find " << index_to_send[idx] << " cell found " + // << (*grp_ptr)->component(pos).csymbolics().morton_index << '\n'; + auto const& m = cell.transfer_multipoles(); + auto nb_m = m.size(); + // std::cout << " nb_m" << m.size() <<std::endl; + for(std::size_t i{0}; i < nb_m; ++i) + { + auto const& ten = m.at(i); + std::copy(std::begin(ten), std::end(ten), it); + it += ten.size(); + } + } + std::cout << " ----- build_buffer ----\n" << std::flush; + // io::print("buffer: ", buffer); + } + catch(std::exception& e) + { + std::cout << " error in buffer building !!!!!!!!!\n"; + std::cout << e.what() << '\n' << std::flush; + } + return buffer; + } + + /// @brief Perform the communications to send the multipoles + + /// @tparam valueType + /// @tparam TreeS the tree type + /// @tparam VectorOfVectorMortonType + /// @param level the level in the tree + /// @param tree the tree containing the multipole to send + /// @param morton_to_send the morton index of the cells containing the multipole + template<typename TreeS, typename VectorOfVectorMortonType> + void task_send_multipole_at_level(int const& level, TreeS& tree, VectorOfVectorMortonType const& morton_to_send) + { + static constexpr auto prio{omp::priorities::max}; + static constexpr int nb_inputs = TreeS::cell_type::storage_type::inputs_size; + using multipole_type = typename TreeS::base_type::cell_type::storage_type::transfer_multipole_type; + + using dependencies_type = typename TreeS::group_of_cell_type::symbolics_type::ptr_multi_dependency_type; + std::vector<dependencies_type> dependencies; + + auto& para = tree.get_parallel_manager(); + auto comm = para.get_communicator(); + const auto nb_proc = para.get_num_processes(); + const auto rank = para.get_process_id(); + + for(auto p = 0; p < nb_proc; ++p) + { + // std::cout << " Morton to send to " << p << " " << morton_to_send[p].size() << std::endl; + // io::print(" morton_to_send[" + std::to_string(p) + "]", morton_to_send[p]); + + if(morton_to_send[p].size() > 0) + { + /// We first construct the in dependencies to ensure that multipoles + /// are updated by the previous pass. + parallel::utils::build_multipoles_dependencies_from_morton_vector( + tree.begin_mine_cells(level), tree.end_mine_cells(level), morton_to_send[p], dependencies); + } + } + std::sort(std::begin(dependencies), std::end(dependencies)); + auto last = std::unique(std::begin(dependencies), std::end(dependencies)); + dependencies.erase(last, dependencies.end()); + io::print("M2L-old dependencies(send): ", dependencies); + // // + // // build direct access to the cells whose multipoles are to be sent + // // + // using grp_access_type = std::pair<decltype(tree.begin_cells(level)), int>; + // auto mpi_multipole_value_type = cpp_tools::parallel_manager::mpi::get_datatype<multipole_type>(); + // std::vector<std::vector<grp_access_type>> cells_to_send_access(nb_proc); + // auto begin_grp = tree.begin_mine_cells(level); + // auto end_grp = tree.end_mine_cells(level); + // // std::clog << " build_direct_access_to_components " << std::endl << std::flush; + // scalfmm::parallel::comm::build_direct_access_to_components(nb_proc, begin_grp, end_grp, cells_to_send_access, + // morton_to_send); + + // for(auto p = 0; p < nb_proc; ++p) + // { + // auto& access = cells_to_send_access[p]; + + // std::cout << "task_send_multipole_at_level for process " << p << " size " << access.size() << std::endl + // << std::flush; + // for(auto i = 0; i < access.size(); ++i) + // { + // std::cout << " task_send " << i << " ptr " << std::flush << access[i].first->get() << " index " + // << access[i].second << " morton " << std::flush + // << (*(access[i].first))->component(access[i].second).csymbolics().morton_index + // << " multipoles " + // << (*(access[i].first))->component(access[i].second).transfer_multipoles().at(0) << std::endl + // << std::flush; + // } + // std::cout << "---------------------------" << std::endl << std::flush; + // } + static constexpr std::size_t inputs_size = TreeS::base_type::cell_type::inputs_size; + // + // construct the MPI type to send the all multipoles + // + // tree.get_send_multipole_types(level) = scalfmm::parallel::comm::build_mpi_multipoles_type( + // cells_to_send_access, inputs_size, mpi_multipole_value_type); + + // auto const& multipole_type_to_send = tree.get_send_multipole_types(level); + // tree.print_send_multipole_types(level); + // for(auto p = 0; p < nb_proc; ++p) + // { + // std::cout << " m2l(prep) ptr_data_type(" << p << ") " << &(multipole_type_to_send[p]) << " level: " << level + // << std::endl + // << std::flush; + // } + // + // std::clog << " end build_mpi_multipoles_type " << std::endl << std::flush; + auto ptr_tree = &tree; +// +// task to perform on communications , shared(morton_to_send) +#pragma omp task untied firstprivate(rank, nb_proc, level, dependencies, ptr_tree) \ + depend(iterator(std::size_t it = 0 : dependencies.size()), in : (dependencies[it])[0]) priority(prio) + { + std::vector<cpp_tools::parallel_manager::mpi::request> send_mpi_status; + + std::cout << "m2l-old task(send) " << std::endl << std::flush; + io::print("m2l-old task(send) dependencies(in) ", dependencies); + // parallel::comm::print_all_cells(*ptr_tree, level, "M2L task(send)"); + auto morton_to_send1 = ptr_tree->send_morton_indexes(level); + + for(int p = 0; p < nb_proc; ++p) + { + io::print(" morton_to_send1-old ", morton_to_send1[p]); + } + // + // build direct access to the cells whose multipoles are to be sent + // + using grp_access_type = std::pair<decltype(tree.begin_cells(level)), int>; + auto mpi_multipole_value_type = cpp_tools::parallel_manager::mpi::get_datatype<multipole_type>(); + std::vector<std::vector<grp_access_type>> cells_to_send_access(nb_proc); + auto begin_grp = ptr_tree->begin_mine_cells(level); + auto end_grp = ptr_tree->end_mine_cells(level); + // std::clog << " build_direct_access_to_components " << std::endl << std::flush; + scalfmm::parallel::comm::build_direct_access_to_components(nb_proc, begin_grp, end_grp, + cells_to_send_access, morton_to_send1); + // tree.get_send_multipole_types(level) = scalfmm::parallel::comm::build_mpi_multipoles_type( + // cells_to_send_access, inputs_size, mpi_multipole_value_type); + auto multipole_type_to_send = scalfmm::parallel::comm::build_mpi_multipoles_type( + cells_to_send_access, inputs_size, mpi_multipole_value_type); + // auto const& multipole_type_to_send = ptr_tree->get_send_multipole_types(level); + tree.print_send_multipole_types(level); + + for(auto p = 0; p < nb_proc; ++p) + { + if(morton_to_send1[p].size() > 0) + // if(multipole_type_to_send[p] != MPI_DATATYPE_NULL) + { + // print_access(std::cout, cells_to_send_access[p]); + + std::cout << "m2l-old task(send) send to " << p << std::endl << std::flush; + std::cout << " m2l-old (task) ptr_data_type(" << p << ") " << &(multipole_type_to_send[p]) + << " level: " << level << std::endl + << std::flush; + + send_mpi_status.push_back(comm.isend(MPI_BOTTOM, 1, multipole_type_to_send[p], p, 611)); + + std::cout << " m2l(task)-old end send to " << p << "\n" << std::flush; + } + } + + std::cout << " m2l(task)-old end task \n" << std::flush; + + } // end task + } + /// @brief Receive the multipoles and put them in ghost groups + /// + /// @tparam TreeS + /// @tparam VectorOfVectorGroupAccessType + /// @param level level ine tree + /// @param tree the tree + /// @param cells_to_receive_access Cell access vector (ptr on the gout, index inside it) + template<typename TreeS, typename VectorOfVectorGroupAccessType> + void task_receive_multipole_at_level(int const& level, TreeS& tree, + VectorOfVectorGroupAccessType const& cells_to_receive_access) + { + // We first construct the out dependencies (all the ghost groups (right and left)) + // naive version + // + auto& para = tree.get_parallel_manager(); + auto comm = para.get_communicator(); + const auto nb_proc = para.get_num_processes(); + // + auto size_dep{std::distance(tree.begin_cells(level), tree.begin_mine_cells(level)) + + std::distance(tree.end_mine_cells(level), tree.end_cells(level))}; + using dependencies_type = typename TreeS::group_of_cell_type::symbolics_type::ptr_multi_dependency_type; + std::vector<dependencies_type> dependencies(size_dep); + int idx{0}; + for(auto it_grp = tree.begin_cells(level); it_grp != tree.begin_mine_cells(level); ++it_grp, ++idx) + { + dependencies[idx] = &(it_grp->get()->ccomponent(0).cmultipoles(0)); + } + for(auto it_grp = tree.end_mine_cells(level); it_grp != tree.end_cells(level); ++it_grp, ++idx) + { + dependencies[idx] = &(it_grp->get()->ccomponent(0).cmultipoles(0)); + } + io::print("dependencies(recv)-old : ", dependencies); + // + // construct the MPI type to send the all multipoles + // + using multipole_type = typename TreeS::base_type::cell_type::storage_type::transfer_multipole_type; + auto mpi_multipole_value_type = cpp_tools::parallel_manager::mpi::get_datatype<multipole_type>(); + static constexpr std::size_t inputs_size = TreeS::base_type::cell_type::inputs_size; + // auto multipole_type_to_receive = scalfmm::parallel::comm::build_mpi_multipoles_type( + // cells_to_receive_access, inputs_size, mpi_multipole_value_type); + tree.get_receive_multipole_types(level) = scalfmm::parallel::comm::build_mpi_multipoles_type( + cells_to_receive_access, inputs_size, mpi_multipole_value_type); + + // auto ptr_multipole_type_to_receive = tree.get_multipole_types(level).data(); + //receive the multipoles + // tree.set_receive_access(to_receive); + static constexpr auto prio{omp::priorities::max}; + auto ptr_tree = &tree; + +#pragma omp task firstprivate(nb_proc, ptr_tree) \ + depend(iterator(std::size_t it = 0 : dependencies.size()), inout : (dependencies[it])[0]) priority(prio) + { + std::cout << "M2L-old task(transfer(receiv)) " << std::endl << std::flush; + io::print("M2L-old transfer_comm(task) dependencies(in): ", dependencies); + + std::vector<cpp_tools::parallel_manager::mpi::request> recept_mpi_status; + auto ptr_multipole_type_to_receive = ptr_tree->get_receive_multipole_types(level).data(); + for(auto p = 0; p < nb_proc; ++p) + { + if(ptr_multipole_type_to_receive[p] != MPI_DATATYPE_NULL) + { + recept_mpi_status.push_back(comm.irecv(MPI_BOTTOM, 1, ptr_multipole_type_to_receive[p], p, 611)); + } + } + if(recept_mpi_status.size() > 0) + { + cpp_tools::parallel_manager::mpi::request::waitall(recept_mpi_status.size(), recept_mpi_status.data()); + { + std::cout << "M2L-old -- level " << level << " -- " << std::endl; + scalfmm::component::for_each_mine_component(tree.begin_cells(level), tree.end_cells(level), + [](auto const& cell) + { + std::cout << "M2L task(end receive) cell index " + << cell.index() << " multipoles " + << cell.transfer_multipoles().at(0) + << " locals " << cell.locals().at(0) + << std::endl + << std::flush; + }); + } + } + std::clog << "&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&\n" << std::flush; + + } // end task + } + + /// @brief generate a task to send/receive the multipoles + /// + /// The received multipoles are put in ghost groups. + /// @tparam TreeS + /// @param level the level in the tree + /// @param tree /the tree containing the source particles + template<typename TreeS, typename TreeT> + void inline task_communications(const int level, TreeS& tree, TreeT& treeT) + { + // std::cout << "task_communications fct start\n "; + static constexpr auto prio{omp::priorities::max - 5}; + static constexpr int nb_inputs = TreeS::cell_type::storage_type::inputs_size; + // + using multipole_type = typename TreeS::base_type::cell_type::storage_type::transfer_multipole_type; + using dependencies_type = typename TreeS::group_of_cell_type::symbolics_type::ptr_multi_dependency_type; + using grp_access_type = std::pair<decltype(tree.begin_cells(level)), int>; + + // + auto& para = tree.get_parallel_manager(); + const auto nb_proc = para.get_num_processes(); + const auto rank = para.get_process_id(); + if(nb_proc == 1) + { + return; + } + // + auto size_dep{std::distance(tree.begin_cells(level), tree.begin_mine_cells(level)) + + std::distance(tree.end_mine_cells(level), tree.end_cells(level))}; + std::vector<dependencies_type> dependencies_in, dependencies_out(size_dep); + + // Build the dependencies in and out + { + /// We first construct the in dependencies to ensure that multipoles + /// are updated by the previous pass. + auto const& morton_to_send = tree.send_morton_indexes(level); + // io::print("m2l(task comm) morton_to_sendl=" + std::to_string(level) + "): ", morton_to_send); + + for(auto p = 0; p < nb_proc; ++p) + { + if(morton_to_send[p].size() > 0) + { + parallel::utils::build_multipoles_dependencies_from_morton_vector( + tree.begin_mine_cells(level), tree.end_mine_cells(level), morton_to_send[p], dependencies_in); + } + } + std::sort(std::begin(dependencies_in), std::end(dependencies_in)); + auto last = std::unique(std::begin(dependencies_in), std::end(dependencies_in)); + dependencies_in.erase(last, dependencies_in.end()); + // + // out dependencies (on all groups of ghosts) + // + int idx{0}; + for(auto it_grp = tree.begin_cells(level); it_grp != tree.begin_mine_cells(level); ++it_grp, ++idx) + { + dependencies_out[idx] = &(it_grp->get()->ccomponent(0).cmultipoles(0)); + } + for(auto it_grp = tree.end_mine_cells(level); it_grp != tree.end_cells(level); ++it_grp, ++idx) + { + dependencies_out[idx] = &(it_grp->get()->ccomponent(0).cmultipoles(0)); + } + // io::print("m2l(task comm) dependencies(transfer)(out): ", dependencies_out); + } + // std::cout << " insert task (comm M2L(level=" + std::to_string(level) + ") \n"; + // io::print(" out: ", dependencies_out); + // io::print(" in: ", dependencies_in); + + auto ptr_tree = &tree; + auto ptr_treeT = &treeT; + // std::cout << " inout: " << ptr_tree << std::endl; //to serialise the communication tasks + + // clang-format off + #pragma omp task untied default(none) firstprivate(ptr_tree, nb_proc, rank, nb_inputs, level) shared(std::cerr, std::clog) \ + depend(iterator(std::size_t it = 0 : dependencies_in.size()), in : (dependencies_in[it])[0]) \ + depend(iterator(std::size_t it = 0 : dependencies_out.size()), out : (dependencies_out[it])[0]) \ + depend(inout: ptr_tree[0], ptr_treeT[0] ) priority(prio) + // clang-format on + { + int receive_section{0}, send_section{0}; + // std::clog << " m2l task(comm(l=" << level << ")) Send part \n" << std::flush; + // #ifndef NO_COMM_TASK + try + { + const int tag_level = 611 + level; + send_section = 1; + auto mpi_multipole_value_type = cpp_tools::parallel_manager::mpi::get_datatype<multipole_type>(); + auto comm = ptr_tree->get_parallel_manager().get_communicator(); + // send part + + auto& morton_to_send = ptr_tree->send_morton_indexes(level); + // construct cells access (send) + // std::vector<std::vector<grp_access_type>> cells_to_send_access(nb_proc); + auto& cells_to_send_access = ptr_tree->send_cells_access(level); + auto begin_grp = ptr_tree->begin_mine_cells(level); + auto end_grp = ptr_tree->end_mine_cells(level); + // std::clog << " m2l task(comm(l=" << level << ")) build direct acces \n" << std::flush; + scalfmm::parallel::comm::build_direct_access_to_components(nb_proc, begin_grp, end_grp, + cells_to_send_access, morton_to_send); + // build type to send all the multipoles + // std::clog << " m2l task(comm(l=" << level << ")) build type \n" << std::flush; + auto multipole_type_to_send = scalfmm::parallel::comm::build_mpi_multipoles_type( + cells_to_send_access, nb_inputs, mpi_multipole_value_type); + // + for(auto p = 0; p < nb_proc; ++p) + { + // io::print("morton_to_send", morton_to_send[p]); + if(morton_to_send[p].size() > 0) + { + // std::clog << "m2l task(send(l=" << level << ")) send to " << p << " tag " << tag_level + // << std::endl + // << std::flush; + // std::cout << "m2l task(send) ptr_data_type(" << p << ") " //<< &(multipole_type_to_send[p]) + // << " level: " << level << std::endl + // << std::flush; + // io::print(std::cout, "morton_to_send(" + std::to_string(p) + ")", std::begin(morton_to_send[p]), + // std::end(morton_to_send[p])); + // std::cout << std::flush << std::endl; + // #ifdef COMM_SEND + comm.isend(MPI_BOTTOM, 1, multipole_type_to_send[p], p, tag_level); + // #endif + } + } + // std::clog << " m2l task(comm(l=" << level << ")) send end \n"; + /// end send part + /////////////////////////////////////////////////////// + receive_section = 1; + { + // std::clog << " m2l task(comm(l=" << level << ")) Receive part level " << level << std::endl; + // receive part + auto& cells_to_receive_access = ptr_tree->receive_cells_access(level); + // for(auto p = 0; p < nb_proc; ++p) + // { + // auto& access = cells_to_receive_access.at(p); + // if(access.size()) + // { + // std::cout << " cells_to_receive_access " << p << " size " << access.size() << std::endl + // << std::flush; + // for(auto i = 0; i < access.size(); ++i) + // { + // std::cout << i << " ptr " << access[i].first->get() << " index " << access[i].second + // << " morton " + // << (*(access[i].first))->component(access[i].second).csymbolics().morton_index + // << std::endl; + // } + // } + // } + // + auto type = scalfmm::parallel::comm::build_mpi_multipoles_type(cells_to_receive_access, nb_inputs, + mpi_multipole_value_type); + + auto ptr_multipole_type_to_receive = ptr_tree->get_receive_multipole_types(level); + + // post receive + std::vector<cpp_tools::parallel_manager::mpi::request> recept_mpi_status; + for(auto p = 0; p < nb_proc; ++p) + { + if(cells_to_receive_access.at(p).size() != 0) + // if(ptr_multipole_type_to_receive[p] != MPI_DATATYPE_NULL) + { + // std::clog << "m2l task(comm(l=" << level << ")) post ireceive from " << p << " tag " + // << tag_level << std::endl + // << std::flush; + recept_mpi_status.push_back(comm.irecv(MPI_BOTTOM, 1, type[p], p, tag_level)); + } + } + + if(recept_mpi_status.size() > 0) + { +#ifndef SCALFMM_USE_WAIT_ANY + int cpt{0}; + while(cpt != recept_mpi_status.size()) + { + int count{1}; + int index{-1}; + MPI_Status status; + MPI_Waitany(int(recept_mpi_status.size()), + reinterpret_cast<MPI_Request*>(recept_mpi_status.data()), &index, &status); + // index = cpp_tools::parallel_manager::mpi::request::waitany(recept_mpi_status.size()), recept_mpi_status.data(),status); + ++cpt; + // std::clog << "receive one comm from " << status.MPI_SOURCE << " tag " << status.MPI_TAG + // << " wait for " << recept_mpi_status.size() - cpt << std::endl; + + if(status.MPI_TAG != tag_level) + { + std::cerr << " wrong tag wait for " << tag_level << " received " << status.MPI_TAG + << std::endl; + --cpt; + } + } +#else + // std::clog << " m2l task(comm(l=" << level << ")) wait all level " << level << " \n" + // << std::flush; + cpp_tools::parallel_manager::mpi::request::waitall(recept_mpi_status.size(), + recept_mpi_status.data()); + +#endif + // std::cout << " m2l task(comm) wait all level " << level << " \n" << std::flush; + // std::clog << " m2l task(comm) wait all level " << level << " \n" << std::flush; + + // { + // std::cout << "M2L -- level " << level << " -- " << std::endl; + // scalfmm::component::for_each_mine_component( + // ptr_tree->begin_cells(level), ptr_tree->end_cells(level), + // [](auto const& cell) + // { + // std::cout << "M2L task(end receive) cell index " << cell.index() << " multipoles " + // << cell.transfer_multipoles().at(0) << " locals " << cell.locals().at(0) + // << std::endl + // << std::flush; + // }); + // } + } + // std::clog << "m2l task(comm(l=" << level << ")) end receive \n"; + } + } + catch(std::exception& e) + { + std::cerr << " error in task_communication !!!!!!!!!\n"; + std::cerr << " m2l task(comm) crash all level " << level << " \n" << std::flush; + if(receive_section > 0) + { + std::cerr << "Bug in receive section \n" << std::flush; + } + else + { + std::cerr << "Bug in send section \n" << std::flush; + } + std::cerr << e.what() << '\n' << std::flush; + std::exit(EXIT_FAILURE); + } + // #endif + // std::clog << "m2l task(comm(l=" << level << ")) end\n"; + } // end task + // std::cout << "task_communications fct end\n "; + } /** * @brief Perform the communications between the tree_source and the tree_target for the current level @@ -36,7 +626,6 @@ namespace scalfmm::algorithms::mpi::pass * * @tparam TreeS * @tparam TreeT - * * @param level level in the tree * @param tree_source source tree (contains the multipoles) * @param tree_target target tree @@ -44,16 +633,12 @@ namespace scalfmm::algorithms::mpi::pass template<typename TreeS, typename TreeT> inline auto start_communications(const int& level, TreeS& tree_source, TreeT& tree_target) -> void { - using mortonIdx_type = std::int64_t; // typename TreeT::group_of_cell_type::symbolics_type::morton_type; - static constexpr int nb_inputs = TreeS::base_type::cell_type::storage_type::inputs_size; - static constexpr int dimension = TreeS::base_type::box_type::dimension; + // std::cout << "start_communications fct start at level " << level << "\n "; - using value_type_ori = typename TreeS::base_type::cell_type::storage_type::transfer_multipole_type; - using value_type1 = value_type_ori; - using value_type = std::conditional_t<meta::is_complex_v<value_type_ori>, - meta::has_value_type_t<value_type_ori>, value_type_ori>; + using mortonIdx_type = std::int64_t; // typename TreeT::group_of_cell_type::symbolics_type::morton_type; + // static constexpr int nb_inputs = TreeS::base_type::cell_type::storage_type::inputs_size; + // static constexpr int dimension = TreeS::base_type::box_type::dimension; - int nb_values = meta::is_complex_v<value_type_ori> ? 2 : 1; auto& para = tree_target.get_parallel_manager(); auto comm = para.get_communicator(); @@ -69,7 +654,7 @@ namespace scalfmm::algorithms::mpi::pass /////////////////////////////////////////////////////////////////////////////////// /// STEP 1 /////////////////////////////////////////////////////////////////////////////////// - /// Determines the Morton index vector to be received from processor p? In addition, for each Morton index we + /// Determines the Morton index vector to be received from processor p. In addition, for each Morton index we /// store the cell, i.e. a pointer to its group and the index within the group (group_ptr, index). This will /// enable us to insert the multipoles received from processor p directly into the cell. /// @@ -84,28 +669,52 @@ namespace scalfmm::algorithms::mpi::pass using grp_access_type = std::pair<decltype(tree_target.begin_cells(level)), int>; std::vector<std::vector<grp_access_type>> to_receive(nb_proc); std::vector<std::vector<mortonIdx_type>> morton_to_receive(nb_proc); // TOREMOVE - // #ifdef SPLIT_COMM + bool verbose = false; + { - auto begin_left_ghost = tree_target.begin_cells(level); + auto begin_left_ghost = tree_source.begin_cells(level); - auto end_left_ghost = tree_target.begin_mine_cells(level); - auto begin_right_ghost = tree_target.end_mine_cells(level); - auto end_right_ghost = tree_target.end_cells(level); + auto end_left_ghost = tree_source.begin_mine_cells(level); + auto begin_right_ghost = tree_source.end_mine_cells(level); + auto end_right_ghost = tree_source.end_cells(level); auto const& distrib = tree_source.get_cell_distribution(level); // + // std::clog << " step 1 M2L" << std::endl << std::flush; + scalfmm::parallel::comm::start_step1(comm, begin_left_ghost, end_left_ghost, begin_right_ghost, end_right_ghost, distrib, nb_messages_to_receive, nb_messages_to_send, - to_receive, morton_to_receive); + to_receive, morton_to_receive, verbose); + // std::cout << " Value after step 1 M2L" << std::endl << std::flush; + // for(auto p = 0; p < nb_proc; ++p) + // { + // auto& access = to_receive[p]; + + // std::cout << " to_receive " << p << " size " << access.size() << std::endl << std::flush; + // for(auto i = 0; i < access.size(); ++i) + // { + // std::cout << i << " ptr " << access[i].first->get() << " index " << access[i].second + // << " morton " + // << (*(access[i].first))->component(access[i].second).csymbolics().morton_index + // << std::endl; + // } + // } + tree_source.receive_cells_access(level) = to_receive; } + // for(auto p = 0; p < nb_proc; ++p) + // { + // io::print(" after_step1 morton to receive[" + std::to_string(p) + "] ", morton_to_receive[p]); + // } + // io::print(" after_step1 nb_messages_to_receive ", nb_messages_to_receive); + // io::print(" after_step1 nb_messages_to_send ", nb_messages_to_send); /////////////////////////////////////////////////////////////////////////////////// /// STEP 2 /// // We can now exchange the morton indices // Morton's list of indices to send their multipole to proc p - std::vector<std::vector<mortonIdx_type>> morton_to_send(nb_proc); - std::vector<cpp_tools::parallel_manager::mpi::request> tab_mpi_status; - + // std::vector<std::vector<mortonIdx_type>> morton_to_send(nb_proc); + // std::clog << " step 2 M2L" << std::endl << std::flush; + auto& morton_to_send = tree_source.send_morton_indexes(level); scalfmm::parallel::comm::start_step2(nb_proc, rank, comm, nb_messages_to_receive, nb_messages_to_send, morton_to_receive, morton_to_send); @@ -126,207 +735,137 @@ namespace scalfmm::algorithms::mpi::pass /// multipoles we're going to put in our ghost cells. ///////////////////////////////////////////////////////////////////////////////// // type of dependence - using dep_type = typename TreeS::group_of_cell_type::symbolics_type::ptr_multi_dependency_type; - - auto mpi_multipole_type = cpp_tools::parallel_manager::mpi::get_datatype<value_type>(); - // std::cout << "\n Start step 3\n\n"; - auto nb_cells{morton_to_send[0].size()}; - for(auto const& p: morton_to_send) { - nb_cells = std::max(nb_cells, p.size()); - } - int order = tree_source.order(); - // nb_values = 2 if complex type otherwise 1; - // math::pow(order, dimension) only works with interpolation not generic !!!! - int size_mult{int(nb_inputs * math::pow(order, dimension)) * nb_values}; // WRONG !!!!!! +#ifndef __SPLIT_SEND_RECEIV__ + task_communications(level, tree_source, tree_target); // works +#else +#warning( "segfault in this section ") - // allocate the buffer to store the multipoles - std::vector<std::vector<value_type_ori>> buffer(nb_proc); - { - // method to construct the buffer of multipoles to send - auto build_buffer = [](auto first_group_ghost, auto last_group_ghost, - std::vector<mortonIdx_type> const& index_to_send, - std::vector<value_type_ori>& buffer) - { - try - { - int idx{0}; - int max_idx = index_to_send.size(); // loop on the groups - auto it = std::begin(buffer); - for(auto grp_ptr = first_group_ghost; grp_ptr != last_group_ghost; ++grp_ptr) - { - int start_grp{0}; - - auto const& csymb = (*grp_ptr)->csymbolics(); - // iterate on the cells - while(idx < max_idx and - math::between(index_to_send[idx], csymb.starting_index, csymb.ending_index)) - { // find cell inside the group - int pos{-1}; - for(int i = start_grp; i < (*grp_ptr)->size(); ++i) - { - auto morton = (*grp_ptr)->component(i).csymbolics().morton_index; - if(index_to_send[idx] == morton) - { - pos = i; - start_grp = i + 1; - // std::cout << " pos = " << pos << std::endl; - break; - } - } - auto const& cell = (*grp_ptr)->component(pos); - auto const& m = cell.transfer_multipoles(); - auto nb_m = m.size(); - for(std::size_t i{0}; i < nb_m; ++i) - { - auto const& ten = m.at(i); - std::copy(std::begin(ten), std::end(ten), it); - it += ten.size(); - } - ++idx; - } - } - } - catch(std::out_of_range& e) - { - std::cout << " error in buffer building !!!!!!!!!\n"; - std::cout << e.what() << '\n' << std::flush; - } - }; + std::clog << " step 3 M2L task_send_multipole_at_level" << std::endl << std::flush; - // vector of dependencies on the group for each MPI process - std::vector<std::vector<dep_type>> deps_send(nb_proc); + // Construct a task to send the multipole + { + // loop on the processors to construct the buffer and to send it + task_send_multipole_at_level(level, tree_source, morton_to_send); + } - // loop on the processors to construct the buffer and to send it - for(auto p = 0; p < nb_proc; ++p) + // Construct a task to receive the multipole { - /// check if I have something to send - if(p != rank and morton_to_send[p].size() > 0) - { -#ifdef TRANSFERT_COMM_TASKS - /// We first construct the in dependencies to ensure that multipoles - /// are updated by the previous pass. - parallel::utils::build_dependencies_from_morton_vector(tree_source.begin_mine_cells(level), - tree_source.end_mine_cells(level), - morton_to_send[p], deps_send[p]); - /// spawn a task for sending communication to process p - -#pragma omp task shared(tree_source, morton_to_send, buffer, rank, nb_proc, deps_send, nb_messages_to_receive, \ - size_mult, mpi_multipole_type) firstprivate(p, level) \ - depend(iterator(std::size_t it = 0 : deps_send[p].size()), in : ((deps_send[p])[it])[0]) priority(prio) -#endif - { - buffer[p].resize(morton_to_send[p].size() * size_mult); - build_buffer(tree_source.begin_mine_cells(level), tree_source.end_mine_cells(level), - morton_to_send[p], buffer[p]); + std::cout << " task_receive_multipole_at_level start " << level << std::endl << std::flush; - // send buffer to processor p - comm.isend(reinterpret_cast<value_type*>(buffer[p].data()), buffer[p].size(), - mpi_multipole_type, p, 611); - } - } - } - } - // Reception of the multipoles - { - // Reset the array of requests used in step 2 - tab_mpi_status.clear(); - // We first construct the out dependencies (all the ghost groups) -#ifdef TRANSFERT_COMM_TASKS - - // compute the dependencies - auto size_dep{std::distance(tree_source.begin_cells(level), tree_source.begin_mine_cells(level)) + - std::distance(tree_source.end_mine_cells(level), tree_source.end_cells(level))}; - std::vector<dep_type> deps_recv(size_dep); - { // Find all dependencies - naive version - int idx{0}; - for(auto it_grp = tree_source.begin_cells(level); it_grp != tree_source.begin_mine_cells(level); - ++it_grp, ++idx) - { - deps_recv[idx] = &(it_grp->get()->ccomponent(0).cmultipoles(0)); - } - for(auto it_grp = tree_source.end_mine_cells(level); it_grp != tree_source.end_cells(level); - ++it_grp, ++idx) - { - deps_recv[idx] = &(it_grp->get()->ccomponent(0).cmultipoles(0)); - } + task_receive_multipole_at_level(level, tree_source, to_receive); } - -// post the task on the reception -#pragma omp task shared(rank, nb_proc, nb_messages_to_receive, size_mult, mpi_multipole_type) \ - depend(iterator(std::size_t it = 0 : deps_recv.size()), out : (deps_recv[it])[0]) priority(prio) #endif - { - // post the receives - int cc{0}; - std::vector<cpp_tools::parallel_manager::mpi::request> recept_mpi_status; - std::vector<std::vector<value_type_ori>> buffer_rep(nb_proc); + } // end step3 + // std::cout << "start_communications fct end at level " << level << "\n "; - for(auto p = 0; p < nb_proc; ++p) - { - if(p != rank and nb_messages_to_receive[p] != 0) - { - buffer_rep[p].resize(nb_messages_to_receive[p] * size_mult); + } // end function start_communications - recept_mpi_status.push_back( - comm.irecv(buffer_rep[p].data(), buffer_rep[p].size(), mpi_multipole_type, p, 611)); - ++cc; - } - } + template<typename TreeS, typename TreeT> + inline auto prepare_comm_transfert(const int& level, TreeS& tree_source, TreeT& tree_target) -> void + { + // std::cout << "start_communications fct start at level " << level << "\n "; - // wait we receive all the communication + using mortonIdx_type = + typename TreeS::morton_type; // typename TreeT::group_of_cell_type::symbolics_type::morton_type; + // static constexpr int nb_inputs = TreeS::base_type::cell_type::storage_type::inputs_size; + // static constexpr int dimension = TreeS::base_type::box_type::dimension; - if(recept_mpi_status.size() > 0) - { - cpp_tools::parallel_manager::mpi::request::waitall(recept_mpi_status.size(), - recept_mpi_status.data()); - } + auto& para = tree_source.get_parallel_manager(); + auto comm = para.get_communicator(); - // put the multipoles inside the ghosts - for(auto p = 0; p < nb_proc; ++p) - { - if(p != rank and to_receive[p].size() > 0) - { - auto const& buffer = buffer_rep[p]; - // ONLY WORKS IF SOURCE == TARGET - auto const& pairs = to_receive[p]; - auto it = std::begin(buffer); + auto rank = para.get_process_id(); + auto nb_proc = para.get_num_processes(); + if(nb_proc == 1) + { // Openmp case -> no communication + return; + } + std::vector<int> nb_messages_to_send(nb_proc, 0); + std::vector<int> nb_messages_to_receive(nb_proc, 0); - for(auto i = 0; i < int(pairs.size()); ++i) - { - auto& cell = pairs[i].first->get()->component(pairs[i].second); - auto& m = cell.transfer_multipoles(); - auto nb_m = m.size(); + /////////////////////////////////////////////////////////////////////////////////// + /// STEP 1 + /////////////////////////////////////////////////////////////////////////////////// + /// Determines the Morton index vector to be received from processor p. In addition, for each Morton index we + /// store the cell, i.e. a pointer to its group and the index within the group (group_ptr, index). This will + /// enable us to insert the multipoles received from processor p directly into the cell. + /// + /// to_receive: a vector of vector of pair (the iterator on the group ,the position of the cell in the group) + /// to_receive[p] is the position vector of cells in groups whose Morton index comes from processor p + /// to_receive[p][i] a pair (the iterator on the group ,the position of the cell in the group) + /// vector of size nb_proc + /// - nb_messages_to_receive: the number of morton indices to exchange with processor p + /// - nb_messages_to_send: the number of morton indices to send tp processor p + /// - morton_to_recv: the morton indices to exchange with processor p + ///////////////////////////////////////////////////////////////////////////////// + using grp_access_type = std::pair<decltype(tree_target.begin_cells(level)), int>; + std::vector<std::vector<grp_access_type>> to_receive(nb_proc); + std::vector<std::vector<mortonIdx_type>> morton_to_receive(nb_proc); // TOREMOVE + bool verbose = false; + + { + auto begin_left_ghost = tree_source.begin_cells(level); + + auto end_left_ghost = tree_source.begin_mine_cells(level); + auto begin_right_ghost = tree_source.end_mine_cells(level); + auto end_right_ghost = tree_source.end_cells(level); + auto const& distrib = tree_source.get_cell_distribution(level); + // + // std::clog << " step 1 M2L" << std::endl << std::flush; + + scalfmm::parallel::comm::start_step1(comm, begin_left_ghost, end_left_ghost, begin_right_ghost, + end_right_ghost, distrib, nb_messages_to_receive, nb_messages_to_send, + to_receive, morton_to_receive, verbose); + // std::cout << " Value after step 1 M2L" << std::endl << std::flush; + // for(auto p = 0; p < nb_proc; ++p) + // { + // auto& access = to_receive[p]; + + // std::cout << " to_receive " << p << " size " << access.size() << std::endl << std::flush; + // for(auto i = 0; i < access.size(); ++i) + // { + // std::cout << i << " ptr " << access[i].first->get() << " index " << access[i].second + // << " morton " + // << (*(access[i].first))->component(access[i].second).csymbolics().morton_index + // << std::endl; + // } + // } + tree_source.receive_cells_access(level) = to_receive; + } + // for(auto p = 0; p < nb_proc; ++p) + // { + // io::print(" after_step1 morton to receive[" + std::to_string(p) + "] ", morton_to_receive[p]); + // } + // io::print(" after_step1 nb_messages_to_receive ", nb_messages_to_receive); + // io::print(" after_step1 nb_messages_to_send ", nb_messages_to_send); + + /////////////////////////////////////////////////////////////////////////////////// + /// STEP 2 + /// + // We can now exchange the morton indices + // Morton's list of indices to send their multipole to proc p + // std::vector<std::vector<mortonIdx_type>> morton_to_send(nb_proc); + // std::clog << " step 2 M2L" << std::endl << std::flush; + auto& morton_to_send = tree_source.send_morton_indexes(level); + scalfmm::parallel::comm::start_step2(nb_proc, rank, comm, nb_messages_to_receive, nb_messages_to_send, + morton_to_receive, morton_to_send); + } - for(std::size_t i{0}; i < nb_m; ++i) - { - auto& ten = m.at(i); - std::copy(it, it + ten.size(), std::begin(ten)); - it += ten.size(); - } - } - } - } - } // end task - } // end step3 - para.get_communicator().barrier(); - } // end function start_communications /////////////////////////////////////////////////////////////////////////////////// - /** - * @brief apply the transfer operator to construct the local approximation in tree_target - * - * @tparam TreeS template for the Tree source type - * @tparam TreeT template for the Tree target type - * @tparam FarField template for the far field type - * @tparam BufferPtr template for the type of pointer of the buffer - * - * @param tree_source the tree containing the source cells/leaves - * @param tree_target the tree containing the target cells/leaves - * @param far_field The far field operator - * @param buffers vector of buffers used by the far_field in the transfer pass (if needed) - * @param split the enum (@see split_m2l) tp specify on which level we apply the transfer operator - */ + /// @brief apply the transfer operator to construct the local approximation in tree_target + /// + /// @tparam TreeS template for the Tree source type + /// @tparam TreeT template for the Tree target type + /// @tparam FarField template for the far field type + /// @tparam BufferPtr template for the type of pointer of the buffer + /// @param tree_source the tree containing the source cells/leaves + /// @param tree_target the tree containing the target cells/leaves + /// @param far_field The far field operator + /// @param buffers vector of buffers used by the far_field in the transfer pass (if needed) + /// @param split the enum (@see split_m2l) tp specify on which level we apply the transfer + /// operator + /// template<typename TreeS, typename TreeT, typename FarField, typename BufferPtr> inline auto transfer(TreeS& tree_source, TreeT& tree_target, FarField const& far_field, std::vector<BufferPtr> const& buffers, @@ -352,11 +891,23 @@ namespace scalfmm::algorithms::mpi::pass case omp::pass::split_m2l::full: break; } + // std::cout << "&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&" << std::endl; + + // std::cout << " loop start at top_height " << top_height << " and end at last_level " << last_level << std::endl + // << std::flush; for(std::size_t level = top_height; level < last_level; ++level) { + // std::cout << "transfer : " << level << std::endl << std::flush; start_communications(level, tree_source, tree_target); + // std::cout << " end comm " << level << std::endl << std::flush; + // #pragma omp taskwait + // std::cout << "&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&" << std::endl; + omp::pass::transfer_level(level, tree_source, tree_target, far_field, buffers); + // std::cout << "&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&" << std::endl; } + // std::cout << "end transfer pass" << std::endl << std::flush; + // std::cout << "&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&" << std::endl; } } // namespace scalfmm::algorithms::mpi::pass diff --git a/include/scalfmm/algorithms/mpi/upward.hpp b/include/scalfmm/algorithms/mpi/upward.hpp index 463e5814c85a2db8e0faba004aae68422bd08a17..53d435ce963d15b9c456ef89f1678cb2f0e1c16e 100644 --- a/include/scalfmm/algorithms/mpi/upward.hpp +++ b/include/scalfmm/algorithms/mpi/upward.hpp @@ -1,26 +1,65 @@ // -------------------------------- // See LICENCE file at project root -// File : scalfmm/algorithms/mpi/upward.hpp +// File : algorithm/omp/upward.hpp // -------------------------------- #ifndef SCALFMM_ALGORITHMS_MPI_UPWARD_HPP #define SCALFMM_ALGORITHMS_MPI_UPWARD_HPP #ifdef _OPENMP +#include <omp.h> + #include "scalfmm/algorithms/omp/upward.hpp" +// #include "scalfmm/operators/m2m.hpp" +// #include "scalfmm/operators/tags.hpp" +// #include "scalfmm/tree/utils.hpp" +// #include "scalfmm/utils/massert.hpp" +// #include "scalfmm/utils/math.hpp" #include <cpp_tools/parallel_manager/parallel_manager.hpp> -#include <omp.h> - namespace scalfmm::algorithms::mpi::pass { + /// @brief Construct the vector of dependencies (child group) + /// @tparam IteratorType + /// @tparam MortonType + /// @tparam Dependencies_t + /// @tparam dimension + /// @param begin first iterator on the groups of cells (child) + /// @param end last iterator on the groups of cells (child) + /// @param parent_morton_index the parent index + /// @param dependencies the vector of dependencies + template<int dimension, typename IteratorType, typename MortonType, typename Dependencies_t> + void build_dependencies(IteratorType begin, IteratorType end, MortonType const& parent_morton_index, + Dependencies_t& dependencies) + { + for(auto grp_ptr = begin; grp_ptr != end; ++grp_ptr) + { + auto const& csymb = (*grp_ptr)->csymbolics(); + // iterate on the cells in the same group + // we move forward in the index vector + if(parent_morton_index == (csymb.starting_index >> dimension)) + { + // std::cout << " add depend for grp with Int [" << csymb.starting_index << ", " << csymb.ending_index + // << "]" << std::endl; + dependencies.push_back(&(grp_ptr->get()->ccomponent(0).cmultipoles(0))); + } + else + { + break; + } + } + } /** - * @brief Perform the communications for the children level + * @brief Perform the communications for the children level * + * Check if the parent of my first Morton index at child level is own by the + * previous process. if yes, we send the multipoles + * * @tparam Tree - * * @tparam Approximation + * @input[in] level the level to build the multipoles + * @input[inout] tree to update the multipoles */ template<typename Tree> inline auto start_communications(const int& level, Tree& tree) -> void @@ -36,80 +75,103 @@ namespace scalfmm::algorithms::mpi::pass static constexpr auto prio{omp::priorities::max}; // auto& para = tree.get_parallel_manager(); - auto comm = para.get_communicator(); + auto& comm = para.get_communicator(); + auto ptr_comm = &comm; auto rank = comm.rank(); int nb_proc = comm.size(); int tag_nb = 1200 + 10 * level; int tag_data = 1201 + 10 * level; auto mpi_int_type = cpp_tools::parallel_manager::mpi::get_datatype<int>(); - + // level where the multipoles are known auto level_child = level + 1; + // iterator on the child cell groups + auto it_first_group_of_child = tree.begin_mine_cells(level_child); // get size of multipoles - auto it_group1 = tree.begin_mine_cells(level_child); - auto it_cell1 = it_group1->get()->begin(); - auto const& m1 = it_cell1->cmultipoles(); - int size{int(nb_inputs * m1.at(0).size()) * nb_children}; + auto it_first_cell_child = it_first_group_of_child->get()->begin(); + auto const& m1 = it_first_cell_child->cmultipoles(); + int size{int(nb_inputs * m1.at(0).size()) * nb_children}; //only nb_children -1 is needed in the worse case + + std::vector<dep_type> dependencies_out, dependencies_in; - std::vector<dep_type> deps; + auto const& distrib = tree.get_cell_distribution(level); + auto const& distrib_child = tree.get_cell_distribution(level_child); + bool send_data{false}, receive_data{false}; + + auto ptr_tree = &tree; if(rank > 0) { // If we send the multipoles, they must have been updated by the M2M of the previous level! - - int count{0}; + // Check if you have to send something on the left (rank-1) + // We are at level l and the children are at level l+1. + // The ghost at the child level(on the right) + // must be updated if the parent index of the first child index is not equal to(lower than) + // the first index at level l. // serialization - std::vector<value_type> buffer(size); - // check if we have to send some children - auto first_group_of_child = tree.begin_mine_cells(level_child)->get(); - auto last_index_child = first_group_of_child->component(0).index(); + // check if we have some child to send (if there exists they starts at the first group) + + // first_index_child = morton index of the first child cell (at level = level_child) + auto first_index_child = distrib_child[rank][0]; + // index_parent = first index of the cell at current level on my process + auto index_parent = distrib[rank][0]; + // parent_morton_index = last index of the cell on previous process at current level + auto previous_parent_morton_index = distrib[rank - 1][1] - 1; + // first_index_child = morton index of the first child cell (at level = level_child) + auto last_index_child_previous = distrib_child[rank - 1][1] - 1; // - auto first_group = tree.begin_mine_cells(level)->get(); - auto index = first_group->component(0).index(); - // Check if I have the parent - - // Should be in a task ! - auto it_group = tree.begin_mine_cells(level_child); - auto gs = it_group->get()->size(); - int nb_grp_dep = std::min(static_cast<int>(nb_children / gs + 1), - static_cast<int>(std::distance(it_group, tree.end_mine_cells(level_child)))); - auto it_grp = it_group; -#ifdef M2M_COMM_TASKS - for(int i = 0; i < nb_grp_dep; ++i, ++it_grp) - { - deps.push_back(&(it_grp->get()->ccomponent(0).cmultipoles(0))); - } -#pragma omp task shared(rank, mpi_int_type, size, tag_data, tag_nb, dimension) firstprivate(it_group) \ - depend(iterator(std::size_t it = 0 : deps.size()), out : (deps[it])[0]) priority(prio) -#endif + send_data = (first_index_child >> dimension) == previous_parent_morton_index; + if(send_data) { - if(index > last_index_child >> dimension) + // std::cout << "upward send comm first_index_child " << first_index_child << " parent " + // << (first_index_child >> dimension) << " previous child " << last_index_child_previous + // << " its parent " << previous_parent_morton_index << std::endl + // << std::flush; + // construct the dependencies on the group of multipoles + build_dependencies<dimension>(it_first_group_of_child, tree.end_mine_cells(level_child), + previous_parent_morton_index, dependencies_in); +// io::print("upward(send) dependencies(in) ", dependencies_in); +// std::cout << std::flush; +// task to send the multipoles +#pragma omp task default(none) shared(std::cout, std::clog) \ + firstprivate(ptr_comm, rank, level, tag_data, tag_nb, mpi_int_type, size, dimension, nb_children, \ + previous_parent_morton_index, first_index_child, it_first_group_of_child) \ + depend(iterator(std::size_t it = 0 : dependencies_in.size()), in : (dependencies_in[it])[0]), \ + depend(inout : ptr_tree[0]) priority(prio) { + // std::clog << "upward(task send(" << level << ")) start \n"; + std::vector<value_type> buffer(size); + int count{0}; + + // std::clog << "upward(task send(" << level << ")) index " << first_index_child + // << " parent_of_first_index_child " << previous_parent_morton_index << std::endl + // << std::flush; // index is now the parent of the first child - index = last_index_child >> dimension; + auto index = first_index_child >> dimension; // I have to send // find the number of children to send (get pointer on multipoles !!) // Construct an MPI datatype // serialization - - // auto it_group = tree.begin_mine_cells(level_child); - auto it_cell = it_group->get()->begin(); + auto it_group = it_first_group_of_child; + // + auto it_cell = it_first_group_of_child->get()->begin(); auto next = scalfmm::component::generate_linear_iterator(1, it_group, it_cell); - // We construct an MPI DATA_TYPE - // MPI_Datatype mult_data_type; auto it = std::begin(buffer); + // + // compute the number of cells to send and copy the multipoles in the buffer for(int i = 0; i < nb_children - 1; ++i, next()) { - if(index < (it_cell->index() >> dimension)) + if(previous_parent_morton_index < (it_cell->index() >> dimension)) { break; } + // std::clog << "upward(task send) Check children P " << index << " C " << it_cell->index() + // << " level " << it_cell->csymbolics().level << std::endl + // << std::flush; // copy the multipoles in the buffer auto const& m = it_cell->cmultipoles(); - auto nb_m = m.size(); - for(std::size_t i{0}; i < nb_m; ++i) { auto const& ten = m.at(i); @@ -118,88 +180,249 @@ namespace scalfmm::algorithms::mpi::pass } ++count; } - } - comm.isend(&count, 1, mpi_int_type, rank - 1, tag_nb); + // std::clog << "upward(task send) nb_send = " << count << std::endl; - if(count != 0) - { - // loop to serialize the multipoles - auto mpi_type = cpp_tools::parallel_manager::mpi::get_datatype<value_type>(); + ptr_comm->isend(&count, 1, mpi_int_type, rank - 1, tag_nb); - comm.isend(buffer.data(), size, mpi_type, rank - 1, tag_data); - } + { + // loop to serialize the multipoles + auto mpi_type = cpp_tools::parallel_manager::mpi::get_datatype<value_type>(); + + ptr_comm->isend(buffer.data(), size, mpi_type, rank - 1, tag_data); + // std::cout << "upward(task send) buffer:"; + // for(int i = 0; i < buffer.size(); ++i) + // { + // std::cout << " " << buffer[i]; + // } + // std::cout << " \n" << std::flush; + } + // std::clog << "upward(task send(" << level << ")) end \n"; + } // end task } + // else + // { + // std::cout << "upward no send comm first_index_child " << first_index_child << " previous child " + // << last_index_child_previous << std::endl + // << std::flush; + // } } - if(rank == nb_proc - 1) + + if(rank < nb_proc - 1) { - return; + // last_index_child = morton index of the last child cell (at level = level_child) + auto last_index_child = distrib_child[rank][1] - 1; + // parent_morton_index = last index of the cell on previous process at current level + auto parent_morton_index = distrib[rank][1] - 1; + // first_index_child_next = morton index of the first child cell (at level = level_child) on next mpi process + auto first_index_child_next = distrib_child[rank + 1][0]; + receive_data = (last_index_child >> dimension) == (first_index_child_next >> dimension); + if(receive_data) + { + // std::cout << "upward receive comm last_index_child " << last_index_child << " parent " + // << (last_index_child >> dimension) << " next child " << first_index_child_next + // << " its parent " << (first_index_child_next >> dimension) << std::endl + // << std::flush; + // dependencies_out contains the ghosts group with parent morton index + auto it_first_ghost = tree.end_mine_cells(level_child); + build_dependencies<dimension>(it_first_ghost, tree.end_cells(level_child), parent_morton_index, + dependencies_out); + // io::print(std::clog, "upward(receive) dependencies(out) ", dependencies_out); + // io::print(std::cout, "upward(receive) dependencies(out) ", dependencies_out); + // std::clog << std::flush; +// dependencies_out +#pragma omp task default(none) shared(std::cout, std::clog) \ + firstprivate(ptr_comm, rank, level, tag_data, tag_nb, mpi_int_type, size, dimension, it_first_ghost) \ + depend(iterator(std::size_t it = 0 : dependencies_out.size()), out : (dependencies_out[it])[0]), \ + depend(inout : ptr_tree[0]) priority(prio) + { + // std::clog << "upward(task receive(" << level << ")) start \n"; + int count{0}; + std::vector<value_type> buffer(size); + ptr_comm->recv(&count, 1, mpi_int_type, rank + 1, tag_nb); + // std::clog << "upward(task receive(" << level << ")) " << count << " cell(s)\n" << std::flush; + + auto mpi_type = cpp_tools::parallel_manager::mpi::get_datatype<value_type>(); + + cpp_tools::parallel_manager::mpi::request recept_mpi_status( + ptr_comm->irecv(buffer.data(), size, mpi_type, rank + 1, tag_data)); + cpp_tools::parallel_manager::mpi::request::waitall(1, &recept_mpi_status); + + // ptr_comm->recv(buffer.data(), size, mpi_type, rank + 1, tag_data); + + // std::cout << "upward(task receive) buffer:"; + // for(int i = 0; i < buffer.size(); ++i) + // { + // std::cout << " " << buffer[i]; + // } + // std::cout << std::endl << std::flush; + // first right ghost + auto it_group = it_first_ghost; + auto it_cell = it_group->get()->begin(); + // linear iterator on cells + auto next1 = scalfmm::component::generate_linear_iterator(1, it_group, it_cell); + auto it = std::begin(buffer); + + for(int i = 0; i < count; ++i, next1()) + { + // copy the multipoles in the buffer + auto& m = it_cell->multipoles(); + auto nb_m = m.size(); + // std::clog << "upward((" << level << ")) cell index: " << it_cell->index() << " level " + // << it_cell->csymbolics().level << "\n" + // << std::flush; + for(std::size_t i{0}; i < nb_m; ++i) + { + auto& ten = m.at(i); + // std::cout << "upward() ten(before) " << ten << std::endl << std::flush; + std::copy(it, it + ten.size(), std::begin(ten)); + it += ten.size(); + // std::cout << "upward() ten(after) " << ten << std::endl << std::flush; + } + } + // std::clog << "upward(task receive(" << level << ")) end \n"; + } // end task + } + // else + // { + // std::cout << "no receive comm last_index_child " << last_index_child << " next child " + // << first_index_child_next << std::endl + // << std::flush; + // } } - // Add task dependencies + } + + template<typename Tree> + void prepare_comm_up(Tree tree, const int level) + { + using value_type = typename Tree::base_type::cell_type::value_type; + using dep_type = typename Tree::group_of_cell_type::symbolics_type::ptr_multi_dependency_type; + + static constexpr std::size_t dimension = Tree::base_type::box_type::dimension; + static constexpr int nb_inputs = Tree::cell_type::storage_type::inputs_size; // - // We receive the cells (at most 2^d - 1) to have all the children of - // the last father cell I own. These cells go into the first phantom group on the right. - // dep(out) these cells - // dep(out) group_parent_dep[0]?? - - auto it_group = tree.end_mine_cells(level_child); - auto gs = it_group->get()->size(); - int nb_grp_dep = std::min(static_cast<int>(nb_children / gs + 1), - static_cast<int>(std::distance(it_group, tree.end_cells(level_child)))); - auto it_group_parent = --(tree.end_mine_cells(level)); - auto it_grp = it_group; - for(int i = 0; i < nb_grp_dep; ++i, ++it_grp) - { - deps.push_back(&(it_grp->get()->ccomponent(0).cmultipoles(0))); - } + // number of theoretical children + constexpr int nb_children = math::pow(2, dimension); + static constexpr auto prio{omp::priorities::max}; + // + auto& para = tree.get_parallel_manager(); + auto& comm = para.get_communicator(); + auto ptr_comm = &comm; + auto rank = comm.rank(); + int nb_proc = comm.size(); + int tag_nb = 1200 + 10 * level; + int tag_data = 1201 + 10 * level; + auto mpi_int_type = cpp_tools::parallel_manager::mpi::get_datatype<int>(); + // level where the multipoles are known + auto level_child = level + 1; + // iterator on the child cell groups + auto it_first_group_of_child = tree.begin_mine_cells(level_child); + // get size of multipoles + auto it_first_cell_child = it_first_group_of_child->get()->begin(); + auto const& m1 = it_first_cell_child->cmultipoles(); + int size{int(nb_inputs * m1.at(0).size()) * nb_children}; //only nb_children -1 is needed in the worse case + + auto msg = tree.get_up_down_access(level); - auto group_parent_dep = it_group_parent->get()->ccomponent(0).cmultipoles(0); -#ifdef M2M_COMM_TASKS -#pragma omp task shared(rank, mpi_int_type, size, tag_data, tag_nb) \ - depend(iterator(std::size_t it = 0 : deps.size()), out : (deps[it])[0]) priority(prio) -#endif + auto const& distrib = tree.get_cell_distribution(level); + auto const& distrib_child = tree.get_cell_distribution(level_child); + bool send_data{false}, receive_data{false}; + + auto ptr_tree = &tree; + + if(rank > 0) { - int count{-1}; - comm.recv(&count, 1, mpi_int_type, rank + 1, tag_nb); - // use a recv - if(count > 0) - { - std::vector<value_type> buffer(size); - auto mpi_type = cpp_tools::parallel_manager::mpi::get_datatype<value_type>(); - comm.recv(buffer.data(), size, mpi_type, rank + 1, tag_data); + // If we send the multipoles, they must have been updated by the M2M of the previous level! + // Check if you have to send something on the left (rank-1) + // We are at level l and the children are at level l+1. + // The ghost at the child level(on the right) + // must be updated if the parent index of the first child index is not equal to(lower than) + // the first index at level l. + // serialization + // check if we have some child to send (if there exists they starts at the first group) - std::cout << std::endl; - auto it_group = tree.end_mine_cells(level_child); - auto it_cell = it_group->get()->begin(); - auto next1 = scalfmm::component::generate_linear_iterator(1, it_group, it_cell); - auto it = std::begin(buffer); + // first_index_child = morton index of the first child cell (at level = level_child) + auto first_index_child = distrib_child[rank][0]; + // index_parent = first index of the cell at current level on my process + auto index_parent = distrib[rank][0]; + // parent_morton_index = last index of the cell on previous process at current level + auto previous_parent_morton_index = distrib[rank - 1][1] - 1; + // first_index_child = morton index of the first child cell (at level = level_child) + auto last_index_child_previous = distrib_child[rank - 1][1] - 1; + // + send_data = (first_index_child >> dimension) == previous_parent_morton_index; - for(int i = 0; i < count; ++i, next1()) - { - // copy the multipoles in the buffer - auto& m = it_cell->multipoles(); + if(send_data) + { + std::clog << "upward(task send(" << level << ")) start \n"; + int count{0}; + std::clog << "upward(task send(" << level << ")) index " << first_index_child + << " parent_of_first_index_child " << previous_parent_morton_index << std::endl + << std::flush; + // index is now the parent of the first child + auto index = first_index_child >> dimension; + // I have to send + // find the number of children to send (get pointer on multipoles !!) + // Construct an MPI datatype - auto nb_m = m.size(); - for(std::size_t i{0}; i < nb_m; ++i) + // serialization + auto it_group = it_first_group_of_child; + // + auto it_cell = it_first_group_of_child->get()->begin(); + auto next = scalfmm::component::generate_linear_iterator(1, it_group, it_cell); + // + // compute the number of cells to send and copy the multipoles in the buffer + for(int i = 0; i < nb_children - 1; ++i, next()) + { + if(previous_parent_morton_index < (it_cell->index() >> dimension)) { - auto& ten = m.at(i); - std::copy(it, it + ten.size(), std::begin(ten)); - it += ten.size(); + break; } + std::clog << "upward(task send) Check children P " << index << " C " << it_cell->index() + << " level " << it_cell->csymbolics().level << std::endl + << std::flush; + ++count; } + std::clog << "upward(task send) nb_send = " << count << std::endl; + ptr_comm->isend(&count, 1, mpi_int_type, rank - 1, tag_nb); + + msg.set_nb_child_to_send(count); } } - } + if(rank < nb_proc - 1) + { + // last_index_child = morton index of the last child cell (at level = level_child) + auto last_index_child = distrib_child[rank][1] - 1; + // parent_morton_index = last index of the cell on previous process at current level + auto parent_morton_index = distrib[rank][1] - 1; + // first_index_child_next = morton index of the first child cell (at level = level_child) on next mpi process + auto first_index_child_next = distrib_child[rank + 1][0]; + receive_data = (last_index_child >> dimension) == (first_index_child_next >> dimension); + if(receive_data) + { + // std::cout << "upward receive comm last_index_child " << last_index_child << " parent " + // << (last_index_child >> dimension) << " next child " << first_index_child_next + // << " its parent " << (first_index_child_next >> dimension) << std::endl + // << std::flush; - /** - * @brief This function constructs the local approximation for all the cells of the tree by applying the operator m2m. - * - * @tparam Tree - * @tparam Approximation - * - * @param tree the tree target - * @param approximation the approximation to construct the local approximation - */ + std::clog << std::flush; + { + std::clog << "upward(task receive(" << level << ")) start \n"; + int count{0}; + std::vector<value_type> buffer(size); + ptr_comm->recv(&count, 1, mpi_int_type, rank + 1, tag_nb); + std::clog << "upward(task receive(" << level << ")) " << count << " cell(s)\n" << std::flush; + msg.set_nb_child_to_receive(count); + } + } + } + }; + /// @brief This function constructs the local approximation for all the cells of the tree by applying the operator + /// m2m + /// + /// @param tree the tree target + /// @param approximation the approximation to construct the local approximation + /// template<typename Tree, typename Approximation> inline auto upward(Tree& tree, Approximation const& approximation) -> void { @@ -207,14 +430,29 @@ namespace scalfmm::algorithms::mpi::pass // upper working level is const int top_height = tree.box().is_periodic() ? 0 : 2; - + // const int start_duplicated_level = tree.start_duplicated_level(); + // + // int top = start_duplicated_level < 0 ? top_height : start_duplicated_level - 1; int top = top_height; for(int level = leaf_level - 1; level >= top /*top_height*/; --level) // int because top_height could be 0 { + // std::cout << "M2M : " << level + 1 << " -> " << level << std::endl << std::flush; + // start_communications(level, tree); + // std::cout << " end comm " << std::endl << std::flush; omp::pass::upward_level(level, tree, approximation); + // std::cout << " end upward_level " << level << std::endl << std::flush; } + // std::cout << "end upward " << std::endl << std::flush; + + // + + // for(int level = start_duplicated_level; level >= top_height; --level) // int because top_height could be 0 + // { + // std::cout << "Level duplicated (seq): " << level << std::endl; + // upward_level(level, tree, approximation); + // } } } // namespace scalfmm::algorithms::mpi::pass diff --git a/include/scalfmm/algorithms/omp/direct.hpp b/include/scalfmm/algorithms/omp/direct.hpp index e11a58abf57208db8453b15af27bbe5d32c45dd3..07afa569c2572dbd8dd85b6648e210baed516bd5 100644 --- a/include/scalfmm/algorithms/omp/direct.hpp +++ b/include/scalfmm/algorithms/omp/direct.hpp @@ -52,8 +52,11 @@ namespace scalfmm::algorithms::omp::pass const auto& matrix_kernel = nearfield.matrix_kernel(); // loop on the groups - auto begin_groups{std::get<0>(begin)}; - const auto end_groups{std::get<0>(end)}; + // auto begin_groups{std::get<0>(begin)}; + // const auto end_groups{std::get<0>(end)}; + auto begin_groups{tree.begin_mine_leaves()}; + auto end_groups{tree.end_leaves()}; + // auto end_groups{tree.end_mine_leaves()}; const auto prio_big{priorities::p2p_big}; const auto prio_small{priorities::p2p_small}; if(mutual) @@ -94,7 +97,9 @@ namespace scalfmm::algorithms::omp::pass } } // - begin_groups = std::get<0>(begin); + // begin_groups = std::get<0>(begin); + begin_groups = tree.begin_mine_leaves(); + end_groups = tree.end_mine_leaves(); while(begin_groups != end_groups) { const auto current_group_ptr_particles = (*begin_groups).get()->depends_update(); diff --git a/include/scalfmm/algorithms/omp/downward.hpp b/include/scalfmm/algorithms/omp/downward.hpp index 759a0c9b9f2527f07cadf1cdb753060614bda472..236647851675b92436bdb231203a22d1deb8f97d 100644 --- a/include/scalfmm/algorithms/omp/downward.hpp +++ b/include/scalfmm/algorithms/omp/downward.hpp @@ -33,6 +33,7 @@ namespace scalfmm::algorithms::omp::pass using interpolator_type = typename std::decay_t<ApproximationType>; using value_type = typename interpolator_type::value_type; + static constexpr auto prio{priorities::l2l}; static constexpr auto dimension = interpolator_type::dimension; // Get the index of the corresponding child-parent interpolator @@ -40,25 +41,28 @@ namespace scalfmm::algorithms::omp::pass (approximation.cell_width_extension() < std::numeric_limits<value_type>::epsilon()) ? 2 : level; // // parent level // iterator on the groups of cells (current level) - auto group_of_cell_begin = tree.begin_mine_cells(level); + auto group_of_cell_begin = tree.begin_cells(level); auto group_of_cell_end = tree.end_mine_cells(level); // // iterator on the groups of cells (child level) - auto group_of_child_cell_begin = tree.begin_cells(level + 1); - auto group_of_child_cell_end = tree.end_cells(level + 1); + auto group_of_child_cell_begin = tree.begin_mine_cells(level + 1); + auto group_of_child_cell_end = tree.end_mine_cells(level + 1); auto start_range_dependencies{group_of_cell_begin}; auto end_range_dependencies{group_of_cell_begin}; - static constexpr auto prio{priorities::l2l}; - + // We iterate on the group of child cells while(group_of_child_cell_begin != group_of_child_cell_end) { using ptr_parent_groups_type = std::decay_t<decltype(group_of_cell_begin->get())>; - auto group_child = group_of_child_cell_begin->get(); - auto group_child_raw = &group_child->ccomponent(0).clocals(0); - auto child_group_starting_morton_index = group_child->csymbolics().starting_index; - auto child_group_ending_morton_index = group_child->csymbolics().ending_index; + auto ptr_group_child = group_of_child_cell_begin->get(); + // For the dependencies + auto group_child_raw = &ptr_group_child->ccomponent(0).clocals(0); + auto child_group_starting_morton_index = ptr_group_child->csymbolics().starting_index; + auto child_group_ending_morton_index = ptr_group_child->csymbolics().ending_index; + // std::cout << " group cells in [" << child_group_starting_morton_index << ", " + // << child_group_ending_morton_index << "[" << std::endl; + // auto parent_starting_morton_index = child_group_starting_morton_index >> dimension; auto parent_ending_morton_index = ((child_group_ending_morton_index - 1) >> dimension) + 1; @@ -66,39 +70,47 @@ namespace scalfmm::algorithms::omp::pass std::tie(start_range_dependencies, end_range_dependencies) = index::get_parent_group_range( parent_starting_morton_index, parent_ending_morton_index, start_range_dependencies, group_of_cell_end); + auto start_range_dependencies_tmp{start_range_dependencies}; const auto end_range_dependencies_tmp{end_range_dependencies}; while(start_range_dependencies != end_range_dependencies) { + // std::cout << " --> parent cells in [" << (*start_range_dependencies)->csymbolics().starting_index + // << ", " << (*start_range_dependencies)->csymbolics().ending_index << "[" << std::endl; parent_dependencies.push_back(&(*start_range_dependencies)->ccomponent(0).clocals(0)); - // parent_groups.push_back(start_range_dependencies->get()); ++start_range_dependencies; } start_range_dependencies = --end_range_dependencies; // clang-format off -#pragma omp task untied default(none) firstprivate(group_child, start_range_dependencies_tmp, end_range_dependencies_tmp, level_interpolator_index) \ - shared(approximation) \ - depend(iterator(it = 0 : std::size(group_child->csymbolics().group_dependencies_l2l_in)), \ - in : (group_child->csymbolics().group_dependencies_l2l_in.at(it))[0]) depend(inout : group_child_raw[0]) \ +#pragma omp task untied default(none) firstprivate(ptr_group_child, start_range_dependencies_tmp, end_range_dependencies_tmp, level_interpolator_index,group_child_raw) \ + shared(approximation, std::cout) \ + depend(iterator(it = 0 : std::size(ptr_group_child->csymbolics().group_dependencies_l2l_in)), \ + in : (ptr_group_child->csymbolics().group_dependencies_l2l_in.at(it))[0]) depend(inout : group_child_raw[0]) \ priority(prio) // clang-format on { // Can be a task(in:iterParticles, out:iterChildCells ...) + // io::print("l2l(task) dependencies(transfer)(in): ", + // ptr_group_child->csymbolics().group_dependencies_l2l_in); + // std::cout << "l2l(task) run task dep(inout) on group group_child_raw " << group_child_raw << std::endl + // << std::flush; std::vector<ptr_parent_groups_type> parent_groups; while(start_range_dependencies_tmp != end_range_dependencies_tmp) { parent_groups.push_back(start_range_dependencies_tmp->get()); ++start_range_dependencies_tmp; } - for(std::size_t cell_index = 0; cell_index < group_child->csymbolics().number_of_component_in_group; + for(std::size_t cell_index = 0; cell_index < ptr_group_child->csymbolics().number_of_component_in_group; ++cell_index) { - auto& child_cell = group_child->component(cell_index); + auto& child_cell = ptr_group_child->component(cell_index); static constexpr auto number_of_child = math::pow(2, dimension); auto child_morton_index{child_cell.index()}; auto parent_morton_index{child_morton_index >> dimension}; + // std::cout << " cell index " << child_morton_index << " its parent_morton_index " + // << parent_morton_index << std::endl; for(auto p: parent_groups) { @@ -107,7 +119,8 @@ namespace scalfmm::algorithms::omp::pass int parent_index_in_group = p->component_index(parent_morton_index); assertm(parent_index_in_group != -1, "Upward pass: parent cell not found!"); auto const& parent_cell = p->ccomponent(std::size_t(parent_index_in_group)); - + // std::cout << " parent found " << parent_cell.index() << " locals " + // << parent_cell.clocals().at(0) << std::endl; const std::size_t child_index = child_morton_index & (number_of_child - 1); l2l(approximation, parent_cell, child_index, child_cell, level_interpolator_index); } diff --git a/include/scalfmm/algorithms/omp/leaf_to_cell.hpp b/include/scalfmm/algorithms/omp/leaf_to_cell.hpp index f1b53a405d3c2a89c3d8e23602941292ea5bc584..d6b251490eda627a8b380d788018399539cfeef7 100644 --- a/include/scalfmm/algorithms/omp/leaf_to_cell.hpp +++ b/include/scalfmm/algorithms/omp/leaf_to_cell.hpp @@ -53,9 +53,10 @@ namespace scalfmm::algorithms::omp::pass auto group_cell_raw = &(*group_of_cell_begin)->ccomponent(0).cmultipoles(0); // clang-format off #pragma omp task untied default(none) firstprivate(group_of_leaf_begin, group_of_cell_begin, group_cell_raw) \ - shared(approximation, far_field) depend(inout : group_cell_raw[0]) priority(prio) + shared(approximation, far_field, std::cout) depend(inout : group_cell_raw[0]) priority(prio) // clang-format on { + // std::cout << "leaf_to_Cell(task) dependencies(out) " << group_cell_raw << std::endl << std::flush; // Can be a task(in:iterParticles, out:iterCells) auto leaf_begin = (*group_of_leaf_begin)->cbegin(); auto cell_begin = (*group_of_cell_begin)->begin(); diff --git a/include/scalfmm/algorithms/omp/transfer.hpp b/include/scalfmm/algorithms/omp/transfer.hpp index d24931582f7230fe6cfceb6a84fd41c74e76f903..c8406be836e043298d55add80872d8a2b685b516 100644 --- a/include/scalfmm/algorithms/omp/transfer.hpp +++ b/include/scalfmm/algorithms/omp/transfer.hpp @@ -63,21 +63,40 @@ namespace scalfmm::algorithms::omp::pass /// loop on the groups at level level const auto num_threads{omp_get_num_threads()}; for(auto cell_target_level_it = begin_cell_target_level_it; cell_target_level_it != end_cell_target_level_it; - ++cell_target_level_it, ++num) + ++cell_target_level_it) { // get() because cell_target_level_it is a shared pointer auto group_ptr = cell_target_level_it->get(); // dependence on the first local of the group auto ptr_local_raw = &(group_ptr->ccomponent(0).clocals(0)); - static constexpr auto prio{priorities::m2l}; + auto prio{priorities::m2l}; +#ifdef SCALFMM_USE_MPI + // Increase th priority on the last group (For L2L in MPI) + if(cell_target_level_it == (end_cell_target_level_it - 1)) + { + prio = priorities::max; + } +#endif + // + // std::cout << "M2L level = " << level << std::endl << std::flush; + // io::print("M2L dependencies(transfer)(in): ", group_ptr->csymbolics().group_dependencies_m2l); + // std::cout << "M2L dep(inout) on groupe ptr_local_raw " << ptr_local_raw << std::endl << std::flush; + + // // clang-format off -#pragma omp task untied default(none) firstprivate(group_ptr, ptr_local_raw) shared(approximation, buffers) \ +#pragma omp task untied default(none) firstprivate(group_ptr, ptr_local_raw, level) shared(approximation, buffers, std::clog) \ depend(iterator(it = 0 : std::size(group_ptr->csymbolics().group_dependencies_m2l)), \ in : (group_ptr->csymbolics().group_dependencies_m2l.at(it)[0])) depend(inout : ptr_local_raw[0]) priority(prio) // clang-format on { const auto thread_id{omp_get_thread_num()}; + // io::print(std::clog, + // "m2l(task) dependencies(transfer)(in): ", group_ptr->csymbolics().group_dependencies_m2l); + // std::clog << "m2l(task) start task dep(inout) on groupe ptr_local_raw " << ptr_local_raw + // << " level=" << level << std::endl + // << std::flush; + /////////////////////////////////////////////////////////////////////////////////////// // loop on the leaves of the current group for(std::size_t index_in_group{0}; index_in_group < std::size(*group_ptr); ++index_in_group) @@ -90,7 +109,12 @@ namespace scalfmm::algorithms::omp::pass // post-processing the leaf if necessary approximation.apply_multipoles_postprocessing(target_cell, *buffers.at(thread_id), thread_id); approximation.buffer_reset(*buffers.at(thread_id)); + // std::clog << "m2l(task) end cell index " << target_cell.index() << " locals " + // << target_cell.locals().at(0) << std::endl; } + // std::clog << "m2l(task) end task dep(inout) on groupe ptr_local_raw " << ptr_local_raw + // << " level=" << level << std::endl + // << std::flush; } // end pragma task /// post-processing the group if necessary } diff --git a/include/scalfmm/algorithms/omp/upward.hpp b/include/scalfmm/algorithms/omp/upward.hpp index 7738ae44c0dacf661120e07f4352807ac42408e7..f5e70fed7ea74761497a7f9438d16d300cfc3f26 100644 --- a/include/scalfmm/algorithms/omp/upward.hpp +++ b/include/scalfmm/algorithms/omp/upward.hpp @@ -5,7 +5,7 @@ #ifndef SCALFMM_ALGORITHMS_OMP_UPWARD_HPP #define SCALFMM_ALGORITHMS_OMP_UPWARD_HPP -#ifdef _OPENMP +#include <limits> #include "scalfmm/operators/m2m.hpp" #include "scalfmm/operators/tags.hpp" @@ -13,7 +13,7 @@ #include "scalfmm/utils/massert.hpp" #include "scalfmm/utils/math.hpp" -#include <limits> +#ifdef _OPENMP #include <omp.h> namespace scalfmm::algorithms::omp::pass @@ -43,8 +43,10 @@ namespace scalfmm::algorithms::omp::pass (approximation.cell_width_extension() < std::numeric_limits<value_type>::epsilon()) ? 2 : level; // // iterator on the groups of cells (child level) - auto group_of_child_cell_begin = tree.begin_cells(level + 1); + // + auto group_of_child_cell_begin = tree.begin_mine_cells(level + 1); auto group_of_child_cell_end = tree.end_cells(level + 1); + // iterator on the groups of cells (current level) auto group_of_cell_begin = tree.begin_mine_cells(level); auto group_of_cell_end = tree.end_mine_cells(level); @@ -75,13 +77,17 @@ namespace scalfmm::algorithms::omp::pass start_range_dependencies = --end_range_dependencies; // clang-format off - #pragma omp task untied default(none) firstprivate( group_parent, start_range_dependencies_tmp, end_range_dependencies_tmp,level_interpolator_index) \ + #pragma omp task untied default(none) firstprivate( group_parent, start_range_dependencies_tmp, end_range_dependencies_tmp,level_interpolator_index, group_parent_raw) \ shared(std::cout, approximation ) \ depend(iterator(std::size_t it = 0 : grp_parent_sym.group_dependencies_m2m_in.size()), \ in: (grp_parent_sym.group_dependencies_m2m_in.at(it))[0]) \ depend(inout: group_parent_raw[0]) priority(prio) // clang-format on { + // io::print("m2m(task) dependencies(in): ", group_parent->csymbolics().group_dependencies_m2m_in); + // std::cout << "m2m(task) run task dep(inout) on group group_parent_raw " << group_parent_raw + // << std::endl + // << std::flush; std::vector<ptr_child_groups_type> child_groups; while(start_range_dependencies_tmp != end_range_dependencies_tmp) @@ -106,6 +112,8 @@ namespace scalfmm::algorithms::omp::pass m2m(approximation, child_cell, child_index, parent_cell, level_interpolator_index); } } + // std::cout << "m2m(task) end cell index " << parent_cell.index() << " multipoles " + // << parent_cell.multipoles().at(0) << std::endl; } approximation.apply_multipoles_preprocessing(parent_cell, omp_get_thread_num()); } diff --git a/include/scalfmm/algorithms/sequential/sequential.hpp b/include/scalfmm/algorithms/sequential/sequential.hpp index cb4b53651e3e9f9ad9ef8ccbffa11fcb5ccc6c1e..b002c930499ed743fbe6da8a74d2db803e602853 100644 --- a/include/scalfmm/algorithms/sequential/sequential.hpp +++ b/include/scalfmm/algorithms/sequential/sequential.hpp @@ -204,6 +204,18 @@ namespace scalfmm::algorithms::sequential timers["p2p"].tac(); } } +#ifdef _DEBUG_BLOCK_DATA + std::clog << "\n blocks after direct\n"; + int tt{0}; + auto group_of_leaves = tree_target.vector_of_leaf_groups(); + + for(auto pg: group_of_leaves) + { + std::clog << "block index " << tt++ << std::endl; + pg->cstorage().print_block_data(std::clog); + } + std::clog << " ---------------------------------------------------\n"; +#endif // print time of each pass if constexpr(options::has(s, options::timit)) diff --git a/include/scalfmm/interpolation/barycentric/barycentric_interpolator.hpp b/include/scalfmm/interpolation/barycentric/barycentric_interpolator.hpp index f6786301390edfcc676a062acb2654c65e6bbbe9..d631bb50b06f821049b3053483c0e4504afe2585 100644 --- a/include/scalfmm/interpolation/barycentric/barycentric_interpolator.hpp +++ b/include/scalfmm/interpolation/barycentric/barycentric_interpolator.hpp @@ -9,6 +9,7 @@ #include "scalfmm/container/variadic_adaptor.hpp" #include "scalfmm/interpolation/grid_storage.hpp" #include "scalfmm/interpolation/interpolator.hpp" +#include "scalfmm/interpolation/m2l_handler.hpp" #include "scalfmm/interpolation/mapping.hpp" #include "scalfmm/interpolation/permutations.hpp" #include "scalfmm/interpolation/traits.hpp" @@ -349,6 +350,15 @@ namespace scalfmm::interpolation return xt::xarray<value_type>(std::vector{math::pow(order, dimension)}, value_type(1.)); #endif } + /** + * @brief Compute he memory in bytes used by the interpolator + * + * @return the memory used by the interpolator + */ + [[nodiscard]] inline auto memory_usage() const noexcept -> std::size_t const + { + return base_m2l_handler_type::memory_usage(); + } private: /** diff --git a/include/scalfmm/interpolation/chebyshev/chebyshev_interpolator.hpp b/include/scalfmm/interpolation/chebyshev/chebyshev_interpolator.hpp index ffea501c2fca7bda1d36bec3a0c9200135c7b813..5d45d38d60d2a526c7c92814d2dfbd9d3d250a48 100644 --- a/include/scalfmm/interpolation/chebyshev/chebyshev_interpolator.hpp +++ b/include/scalfmm/interpolation/chebyshev/chebyshev_interpolator.hpp @@ -9,6 +9,7 @@ #include "scalfmm/container/variadic_adaptor.hpp" #include "scalfmm/interpolation/grid_storage.hpp" #include "scalfmm/interpolation/interpolator.hpp" +#include "scalfmm/interpolation/m2l_handler.hpp" #include "scalfmm/interpolation/mapping.hpp" #include "scalfmm/interpolation/permutations.hpp" #include "scalfmm/interpolation/traits.hpp" @@ -364,6 +365,15 @@ namespace scalfmm::interpolation meta::looper_range<dimension>{}(generate_weights, starts, stops); return roots_weights; } + /** + * @brief ompute he memory in bytes used by the interpolator + * + * @return the memory used by the interpolator + */ + [[nodiscard]] inline auto memory_usage() const noexcept -> std::size_t const + { + return base_m2l_handler_type::memory_usage(); + } private: /** diff --git a/include/scalfmm/interpolation/interpolator.hpp b/include/scalfmm/interpolation/interpolator.hpp index 341aa998790fadc1abcbe999f0f9fabf01ef8735..0f4b1b1af22c825260f050fd31a032e6aa476cb1 100644 --- a/include/scalfmm/interpolation/interpolator.hpp +++ b/include/scalfmm/interpolation/interpolator.hpp @@ -7,7 +7,6 @@ #include "scalfmm/container/point.hpp" #include "scalfmm/interpolation/builders.hpp" -#include "scalfmm/interpolation/m2l_handler.hpp" #include "scalfmm/interpolation/mapping.hpp" #include "scalfmm/interpolation/permutations.hpp" #include "scalfmm/matrix_kernels/mk_common.hpp" @@ -522,48 +521,30 @@ namespace scalfmm::interpolation return *static_cast<derived_type*>(this); } - private: /** - * @brief number of terms of the expansion (1d) + * @brief ompute he memory in bytes used by the interpolator * + * @return the memory used by the interpolator */ - const size_type m_order{}; + [[nodiscard]] inline auto memory_usage() const noexcept -> std::size_t const + { + return this->derived_cast().memory_usage(); + } - /** - * @brief number of modes m_order^dimension - * - */ - const size_type m_nnodes{}; + private: + const size_type m_order{}; ///< number of terms of the expansion (1d) - /** - * @brief height of the tree - * - */ - const size_type m_tree_height{}; + const size_type m_nnodes{}; ///< number of modes m_order^dimension - /** - * @brief width of the simulation box - * - */ - const value_type m_root_cell_width{}; + const size_type m_tree_height{}; ///< height of the tree - /** - * @brief width of the extension of the cell - * - */ - const value_type m_cell_width_extension{}; + const value_type m_root_cell_width{}; ///< width of the simulation box - /** - * @brief true if we use the cell extension - * - */ - const bool m_use_cell_width_extension{}; + const value_type m_cell_width_extension{}; ///< width of the extension of the cell - /** - * @brief - * - */ - array_type m_child_parent_interpolators{}; + const bool m_use_cell_width_extension{}; ///< true if we use the cell extension + + array_type m_child_parent_interpolators{}; ///< /** * @brief diff --git a/include/scalfmm/interpolation/m2l_handler.hpp b/include/scalfmm/interpolation/m2l_handler.hpp index e92e1122b18fa17bb08366be622c90a898813071..1a1b5fec45dba2a756f80ea93b6224903a3ed160 100644 --- a/include/scalfmm/interpolation/m2l_handler.hpp +++ b/include/scalfmm/interpolation/m2l_handler.hpp @@ -383,7 +383,44 @@ namespace scalfmm::interpolation { return m_interactions_matrices; } + /** + * @brief Compute he memory in bytes used by the interpolator + * + * @return the memory used by the interpolator + */ + [[nodiscard]] inline auto memory_usage() const noexcept -> std::size_t const + { + std::size_t memory{0}; + auto size = m_interactions_matrices.size(); + for(auto& mat: m_interactions_matrices) + { + // mat xtensor knxkm + // std::cout << cpt++ << " "; + for(int i = 0; i < mat.shape()[0]; ++i) + { + for(int j = 0; j < mat.shape()[1]; ++j) + { + auto K = mat.at(i, j); + // dense case + if constexpr(std::is_same_v<settings, options::dense_> or + std::is_same_v<settings, options::fft_>) + { + memory += K.size() * sizeof(typename k_tensor_type::value_type); + } + else if constexpr(std::is_same_v<settings, options::low_rank_>) + { + // Low rang (tuple of two matricies) + auto const& A = std::get<0>(K); + auto const& B = std::get<1>(K); + memory += (A.size() + B.size()) * + sizeof(typename std::tuple_element_t<0, k_tensor_type>::value_type); + } + } + } + } + return memory; + } /** * @brief * @@ -1234,12 +1271,8 @@ namespace scalfmm::interpolation } private: - /** - * @brief - * - */ std::vector<interaction_matrix_type, XTENSOR_DEFAULT_ALLOCATOR(interaction_matrix_type)> - m_interactions_matrices{}; + m_interactions_matrices{}; ///< The M2L matricies /** * @brief diff --git a/include/scalfmm/interpolation/uniform/uniform_interpolator.hpp b/include/scalfmm/interpolation/uniform/uniform_interpolator.hpp index 612e322fb4bafbedc21bf2bee402fb2b36e7a051..635d23eb2a2cc80237e0749943959a869e898b58 100644 --- a/include/scalfmm/interpolation/uniform/uniform_interpolator.hpp +++ b/include/scalfmm/interpolation/uniform/uniform_interpolator.hpp @@ -17,6 +17,7 @@ #include "scalfmm/interpolation/generate_circulent.hpp" #include "scalfmm/interpolation/grid_storage.hpp" #include "scalfmm/interpolation/interpolator.hpp" +#include "scalfmm/interpolation/m2l_handler.hpp" #include "scalfmm/interpolation/mapping.hpp" #include "scalfmm/interpolation/traits.hpp" #include "scalfmm/interpolation/uniform/uniform_storage.hpp" @@ -284,7 +285,7 @@ namespace scalfmm::interpolation } /** - * @brief + * @brief Compute the weights of the quadrature * * @param order * @return xt::xarray<value_type> @@ -293,6 +294,16 @@ namespace scalfmm::interpolation { return xt::xarray<value_type>(std::vector{math::pow(order, dimension)}, value_type(1.)); } + + /** + * @brief ompute he memory in bytes used by the interpolator + * + * @return the memory used by the interpolator + */ + [[nodiscard]] inline auto memory_usage() const noexcept -> std::size_t const + { + return base_m2l_handler_type::memory_usage(); + } }; /** @@ -662,7 +673,15 @@ namespace scalfmm::interpolation } return L * sum; } - + /** + * @brief ompute he memory in bytes used by the interpolator + * + * @return the memory used by the interpolator + */ + [[nodiscard]] inline auto memory_usage() const noexcept -> std::size_t const + { + return base_m2l_handler_type::memory_usage(); + } /** * @brief Returns the buffers initialized for the optimized fft * diff --git a/include/scalfmm/lists/utils.hpp b/include/scalfmm/lists/utils.hpp index 57643087baeb2866e26034091ac4713b337e3e6f..671b03bc27a3ee081459b42cb37d311e56e4c42a 100644 --- a/include/scalfmm/lists/utils.hpp +++ b/include/scalfmm/lists/utils.hpp @@ -926,6 +926,33 @@ namespace scalfmm::list [](auto const& a, auto const& b) { return a->index() < b->index(); }); }); } + // template<typename TREE> + // void clean_p2p_ghosts_interactions(TREE& tree) + // { + // auto begin_groups{tree.end_mine_leaves()}; + // const auto end_groups{tree.cend_leaves()}; + // while(begin_groups != end_groups) + // { + // // + // ++begin_groups; + // } + // for(auto group_leaf_iterator_begin = begin_groups; group_leaf_iterator_begin != end_groups; + // ++group_leaf_iterator_begin) + // { + // auto group = (*group_leaf_iterator_begin); + // auto& group_symbolics = group->symbolics(); + + // auto& out_of_group = group_symbolics.outside_interactions; + + // for(auto&& leaf: (*group_leaf_iterator_begin)->components()) + // { + // // get symbolics + // auto& leaf_symbolics = leaf.symbolics(); + // // remove the interactions inside the bloc + // leaf_symbolics.number_of_neighbors = 0; + // } + // } + // } } // namespace scalfmm::list #endif // SCALFMM_LISTS_UTIL_HPP diff --git a/include/scalfmm/operators/count_kernel/count_kernel.hpp b/include/scalfmm/operators/count_kernel/count_kernel.hpp index a7417529111b754d588c3ef00103ca25c614b53f..bd6e006737cb0ee8545e83eea44a48eebf4224be 100644 --- a/include/scalfmm/operators/count_kernel/count_kernel.hpp +++ b/include/scalfmm/operators/count_kernel/count_kernel.hpp @@ -82,6 +82,7 @@ namespace count_kernels * */ static constexpr int separation_criterion{1}; + const std::string name() const { return std::string("count kernel "); } }; /** diff --git a/include/scalfmm/parallel/comm_access.hpp b/include/scalfmm/parallel/comm_access.hpp new file mode 100644 index 0000000000000000000000000000000000000000..d7ff3653cb26a03d9204b5f46f7334d281ce3352 --- /dev/null +++ b/include/scalfmm/parallel/comm_access.hpp @@ -0,0 +1,100 @@ +#pragma once + +#include <vector> + +#ifdef SCALFMM_USE_MPI +#include <mpi.h> +#endif +template<typename morton_type, typename grp_access_type> +class transferDataAccess +{ + private: + // vector per level and per process + std::vector<std::vector<std::vector<morton_type>>> m_send_morton; + std::vector<std::vector<std::vector<grp_access_type>>> m_receive_cells_access; + std::vector<std::vector<std::vector<grp_access_type>>> m_send_cells_access; + +#ifdef SCALFMM_USE_MPI + std::vector<std::vector<MPI_Datatype>> m_send_multipoles_type; + std::vector<std::vector<MPI_Datatype>> m_receive_multipoles_type; +#endif + + public: + auto inline get_send_multipole_types(const int& level) -> std::vector<MPI_Datatype>& + { + return m_send_multipoles_type[level]; + } + + auto inline print_send_multipole_types(const int& level) -> void + { + auto const& type = m_send_multipoles_type[level]; + for(int p = 0; p < type.size(); ++p) + { + std::cout << " ptr_data_type(" << p << ") " << &(type[p]) << " level: " << level << std::endl + << std::flush; + } + } + auto inline get_receive_multipole_types(const int& level) -> std::vector<MPI_Datatype>& + { + return m_receive_multipoles_type[level]; + } + auto inline send_morton_indexes(int const& level, int const& proc) -> std::vector<morton_type>& + { + return m_send_morton[level][proc]; + } + auto inline send_morton_indexes(int const& level) -> std::vector<std::vector<morton_type>>& + { + return m_send_morton[level]; + } + auto inline receive_cells_access(int const& level) -> std::vector<std::vector<grp_access_type>>& + { + return m_receive_cells_access[level]; + } + + auto inline send_cells_access(int const& level) -> std::vector<std::vector<grp_access_type>>& + { + return m_send_cells_access[level]; + } + + transferDataAccess(const int tree_height, const int nb_proc) + { + m_receive_multipoles_type.resize(tree_height); + m_send_multipoles_type.resize(tree_height); + m_send_morton.resize(tree_height); + for(auto& vec: m_send_morton) + { + vec.resize(nb_proc); + } + m_receive_cells_access.resize(tree_height); + for(auto& vec: m_receive_cells_access) + { + vec.resize(nb_proc); + } + m_send_cells_access.resize(tree_height); + for(auto& vec: m_send_cells_access) + { + vec.resize(nb_proc); + } + } +}; + +class UpDownDataAccess +{ + // vector per level and per process + bool done{false}; // if built + int nb_child_send{0}; // number of child to send + int nb_child_receive{0}; // number of child to receive + + public: + UpDownDataAccess() + : done(false) + , nb_child_send(0) + , nb_child_receive(0) + { + } + bool is_done() { return done; } + auto get_nb_child_to_send() const { return nb_child_send; } + void set_nb_child_to_send(const int n) { nb_child_send = n; } + auto get_nb_child_to_receive() const { return nb_child_receive; } + void set_nb_child_to_receive(const int n) { nb_child_receive = n; } +}; \ No newline at end of file diff --git a/include/scalfmm/parallel/mpi/comm.hpp b/include/scalfmm/parallel/mpi/comm.hpp index bf1219e99ec1c61513223ac3eae083f649748d0a..f26399e2a7a6ed52543bac6e2eb6c8d1c06af9e4 100644 --- a/include/scalfmm/parallel/mpi/comm.hpp +++ b/include/scalfmm/parallel/mpi/comm.hpp @@ -1,9 +1,8 @@ -// -------------------------------- -// See LICENCE file at project root -// File : scalfmm/parallel/mpi/comm.hpp -// -------------------------------- #pragma once +#include <stdexcept> +#include <vector> + #include "scalfmm/container/particle.hpp" #include "scalfmm/meta/utils.hpp" #include "scalfmm/parallel/utils.hpp" @@ -12,59 +11,106 @@ #include <mpi.h> -#include <vector> - namespace scalfmm::parallel::comm { using comm_type = cpp_tools::parallel_manager::mpi::communicator; + // Debug functions + namespace debug + { + template<typename TreeType> + void print_all_cells(TreeType& tree, int level, std::string title) + { + std::cout << "M2L -- level " << level << " -- " << std::endl; + scalfmm::component::for_each_mine_component(tree.begin_cells(level), tree.end_cells(level), + [&title](auto const& cell) + { + std::cout << title << " morton " << cell.index() + << " multipoles " + << cell.transfer_multipoles().at(0) << " locals " + << cell.locals().at(0) << std::endl + << std::flush; + }); + } + template<typename VectorAccesType> + inline void set_values(VectorAccesType& cells_to_send_access) + { + std::cout << " debug::set_values " << cells_to_send_access.size() << std::endl << std::flush; + // iterate on the cells + int start{20}; + for(auto access: cells_to_send_access) + { + auto& cell = (*(access.first))->component(access.second); + + // std::cout << " morton to find " << index_to_send[idx] << " cell found " + // << (*grp_ptr)->component(pos).csymbolics().morton_index << '\n'; + auto& m = cell.transfer_multipoles(); + auto nb_m = m.size(); + // std::cout << " nb_m" << m.size() <<std::endl; + for(std::size_t i{0}; i < nb_m; ++i) + { + auto& ten = m.at(i); + // std::copy(std::begin(ten), std::end(ten), it); + ten[0] = start; + std::cout << " start " << start << " " << ten[0] << std::endl; + ++start; + } + } + } + } // namespace debug + /** * @brief Determines the Morton index vector to be received from processor p (STEP 1) - * - * Determines the Morton index vector to be received from processor p? In addition, for each Morton index we - * store the cell, i.e. a pointer to its group and the index within the group (group_ptr, index). This will - * enable us to insert the multipoles received from processor p directly into the cell. - * - * leaf_to_receive_access: a vector of vector of pair (the iterator on the group ,the position of the cell in the group) - * leaf_to_receive_access[p] is the position vector of cells in groups whose Morton index comes from processor p - * leaf_to_receive_access[p][i] a pair (the iterator on the group ,the position of the cell in the group) - * vector of size nb_proc - * - nb_messages_to_receive: the number of morton indices to exchange with processor p - * - nb_messages_to_send: the number of morton indices to send tp processor p - * - morton_to_receive: the morton indices to exchange with processor p - * - * @tparam DistributionType - * @tparam IteratorType - * @tparam VectorOfVectorStructType - * @tparam VectorOfVectorType + * + * Determines the Morton index vector to be received from processor p. In addition, for each Morton index we + * store the cell, i.e. a pointer to its group and the index within the group (group_ptr, index). This will + * enable us to build the MPi data type the multipoles received from processor p directly into the cell. + * + * leaf_to_receive_access: a vector of vector of pair (the iterator on the group ,the position of the cell in the group) + * leaf_to_receive_access[p] is the position vector of cells in groups whose Morton index comes from processor p + * leaf_to_receive_access[p][i] a pair (the iterator on the group ,the position of the cell in the group) + * vector of size nb_proc + * - nb_messages_to_receive: the number of morton indices to exchange with processor p + * - nb_messages_to_send: the number of morton indices to send tp processor p + * - morton_to_receive: the morton indices to exchange with processor p + * + * @tparam distribution_type + * @tparam iterator_type + * @tparam vector_vector_struct_type + * @tparam vector_vector_type * @param[in] comm the mpi communicator * @param[in] begin_left_ghost The iterator of the first ghost on the left * @param[in] end_left_ghost The iterator of the last ghost on the left * @param[in] begin_right_ghost The iterator of the first ghost on the right * @param[in] end_right_ghost The iterator of the last ghost on the right - * @param[in] distrib the data distribution + * @param[in] distrib the data distribution * @param[out] nb_messages_to_receive the number of morton indices to exchange with processor p * @param[out] nb_messages_to_send the number of morton indices to send tp processor p * @param[out] leaf_to_receive_access For each component a direct access to it (iterator on group, position into the group) * @param[out] morton_to_receive for each process the vector of Morton indexes to receive - */ - template<typename DistributionType, typename IteratorType, typename VectorOfVectorStructType, - typename VectorOfVectorType> - inline auto start_step1(comm_type& comm, IteratorType begin_left_ghost, IteratorType end_left_ghost, - IteratorType begin_right_ghost, IteratorType end_right_ghost, - DistributionType const& distrib, std::vector<int>& nb_messages_to_receive, - std::vector<int>& nb_messages_to_send, VectorOfVectorStructType& leaf_to_receive_access, - VectorOfVectorType& morton_to_receive) -> void + */ + template<typename distribution_type, typename iterator_type, typename vector_vector_struct_type, + typename vector_vector_type> + inline void start_step1(comm_type& comm, iterator_type begin_left_ghost, iterator_type end_left_ghost, + iterator_type begin_right_ghost, iterator_type end_right_ghost, + distribution_type const& distrib, std::vector<int>& nb_messages_to_receive, + std::vector<int>& nb_messages_to_send, vector_vector_struct_type& leaf_to_receive_access, + vector_vector_type& morton_to_receive, bool verbose = false) { // We iterate on the ghosts - // function to fill the struture to_receive for groups between first_group_ghost and last_group_ghost - auto build_receive = [&nb_messages_to_receive, &leaf_to_receive_access, &distrib, - &morton_to_receive](auto first_group_ghost, auto last_group_ghost) + // function to fill the struture to_receive the cells for groups between first_group_ghost and last_group_ghost + auto build_receive = [&nb_messages_to_receive, &leaf_to_receive_access, &distrib, &morton_to_receive, + verbose](auto first_group_ghost, auto last_group_ghost) { + // if(verbose) + // std::cout << "step1 build_receive " << std::distance(first_group_ghost, last_group_ghost) << std::endl + // << std::flush; for(auto grp_ptr = first_group_ghost; grp_ptr != last_group_ghost; ++grp_ptr) { int idx{0}; + // if(verbose) + // std::cout << " idx= " << idx << std::endl << std::flush; // iterate on the cells for(auto const& component: (*grp_ptr)->components()) { @@ -73,94 +119,133 @@ namespace scalfmm::parallel::comm ++nb_messages_to_receive[i]; leaf_to_receive_access[i].push_back(std::make_pair(grp_ptr, idx)); morton_to_receive[i].push_back(morton); + // if(verbose) + // std::cout << " step 1 " << idx << " " << *grp_ptr << " " << morton << " proc " << i + // << std::endl; ++idx; } } }; // Start on the left ghosts + // if(verbose) + // std::cout << " Left \n" << std::flush; if(std::distance(begin_left_ghost, end_left_ghost) > 0) { build_receive(begin_left_ghost, end_left_ghost); } // Start on the ghosts on the right + // if(verbose) + // std::cout << " right \n" << std::flush; + if(std::distance(begin_right_ghost, end_right_ghost) > 0) { build_receive(begin_right_ghost, end_right_ghost); } - // io::print("step nb_messages_to_receive[" + std::to_string(p) + "] ", nb_messages_to_receive.data[p]); - // Do we need to sort them ? + // else + // { + // std::cout << " No ghost group" << std::endl << std::flush; + // } + // Faut-il les trier ??? int p{0}; - // io::print("step 1 nb_messages_to_receive ", nb_messages_to_receive); - // std::cout << " morton_to_receive.size() " << morton_to_receive.size() <<std::endl; - // for (auto & vec : morton_to_receive){ - // - // auto last = std::unique(vec.begin(), vec.end()); - // vec.erase(last, vec.end()); - // io::print("step 1 morton_to_receive[" + std::to_string(p++) + "] ", vec); - - // } - /* - p = 0 ; - io::print("step 1 nb_messages_to_send ", nb_messages_to_send); - for (auto & vec : ){ - - auto last = std::unique(vec.begin(), vec.end()); - vec.erase(last, vec.end()); - io::print("step 1 [" + std::to_string(p++) + "] ", vec); - - } - */ + // if(verbose) + // io::print("step 1 nb_messages_to_receive ", nb_messages_to_receive); + // if(verbose) + // std::cout << " step1 morton_to_receive.size() " << morton_to_receive.size() << std::endl; + for(auto& vec: morton_to_receive) + { + // auto last = std::unique(vec.begin(), vec.end()); + // vec.erase(last, vec.end()); + if(verbose) + io::print("step 1 morton_to_receive[" + std::to_string(p++) + "] ", vec); + } + + p = 0; + // if(verbose) + // io::print("step 1 nb_messages_to_send ", nb_messages_to_send); + // if(verbose) + // { + // for(auto& vec: morton_to_receive) + // { + // io::print("step 1 morton_to_receive[" + std::to_string(p++) + "] ", vec); + // } + // } + //////////////////// /// Exchange the morton indexes with processor p auto mpi_int_type = cpp_tools::parallel_manager::mpi::get_datatype<int>(); + if(verbose) + io::print("step 1 nb_messages_to_receive ", nb_messages_to_receive); + // comm.barrier(); + // std::clog << "comm.alltoall\n"; comm.alltoall(nb_messages_to_receive.data(), 1, mpi_int_type, nb_messages_to_send.data(), 1, mpi_int_type); + // std::clog << "znd comm.alltoall\n"; + + if(verbose) + io::print("end step 1 nb_messages_to_send ", nb_messages_to_send); } /** * @brief We can now exchange the morton indices (STEP 2) - * + * * Morton's list of indices to send their data (mutipoles/particles) to proc p - * @tparam VectorOfVectorType + * @tparam vector_vector_type * @param[in] nb_proc number of mpi processes * @param[in] rank the mpi rank * @param[in] comm the communicator * @param[in] nb_messages_to_receive for each process the number of message to receive * @param[in] nb_messages_to_send for each process the number of message to send * @param[in] morton_to_receive for each process the vector of Morton indexes to receive - * @param[out] for each process the vector of Morton indexes to send + * @param[out] morton_to_send for each process the vector of Morton indexes to send */ - template<typename VectorOfVectorType> + template<typename vector_vector_type> inline void start_step2(int const& nb_proc, int const& rank, comm_type& comm, std::vector<int>& nb_messages_to_receive, std::vector<int>& nb_messages_to_send, - VectorOfVectorType& morton_to_receive, VectorOfVectorType&) + vector_vector_type& morton_to_receive, vector_vector_type& morton_to_send, + bool verbose = false) { - using mortonIdx_type = typename VectorOfVectorType::value_type::value_type; + using mortonIdx_type = typename vector_vector_type::value_type::value_type; std::vector<cpp_tools::parallel_manager::mpi::request> tab_mpi_status; // auto mpi_morton_type = cpp_tools::parallel_manager::mpi::get_datatype<mortonIdx_type>(); + // io::print(" step2 nb_messages_to_send ", nb_messages_to_send); for(auto p = 0; p < nb_proc; ++p) { - if(p == rank) - { - continue; - } - // send the morton indexes morton_to_receive - if(nb_messages_to_send[p] != 0) + // if(p == rank) + // { + // continue; + // // } + // std::cout << "proc p " << p << "nb_messages_to_send " << nb_messages_to_send[p] << std::endl; + // std::cout << "proc p " << p << "nb_messages_to_send " << nb_messages_to_receive[p] << std::endl; + + // send the morton indexes morton_to_receive + if(nb_messages_to_receive[p] != 0) { - [p].resize(nb_messages_to_send[p]); + // io::print(" send to " + std::to_string(p) + " morton ", morton_to_receive[p]); - // std::cout << "step 2 me " << rank << " send to " << p << " nb morton= " << nb_messages_to_receive[p] - // << std::endl; - // io::print("step 2 morton_to_receive[" + std::to_string(p) + "] ", morton_to_receive[p]); + // io::print("step 2 morton_to_receive[" + std::to_string(p) + "] ", morton_to_receive[p]); comm.isend(morton_to_receive[p].data(), nb_messages_to_receive[p], mpi_morton_type, p, 600); } - if(nb_messages_to_receive[p] != 0) + } + // std::cout << std::endl << std::endl; + // std::cout << " step2 Start receive communications \n"; + + for(auto p = 0; p < nb_proc; ++p) + { + // std::cout << "proc p " << p << " nb_messages_to_receive " << nb_messages_to_receive[p] << std::endl; + // std::cout << "proc p " << p << " nb_messages_to_send " << nb_messages_to_send[p] << std::endl; + morton_to_send[p].resize(nb_messages_to_send[p], -1); + // io::print("start_step2 init morton_to_send ", morton_to_send[p]); + // Get the morton index to send + if(nb_messages_to_send[p] != 0) { - // std::cout << "step 2 me " << rank << " receive to " << p << " size= " << nb_messages_to_send[p] - // << std::endl; + // std::cout << "step 2 me " << rank << " receive to " << p + // << " nb_messages_to_receive= " << nb_messages_to_receive[p] + // << " nb_messages_to_send= " << nb_messages_to_send[p] << std::endl + // << std::flush; - tab_mpi_status.push_back(comm.irecv([p].data(), nb_messages_to_send[p], mpi_morton_type, p, 600)); + tab_mpi_status.push_back( + comm.irecv(morton_to_send[p].data(), nb_messages_to_send[p], mpi_morton_type, p, 600)); } } if(tab_mpi_status.size() > 0) @@ -168,36 +253,37 @@ namespace scalfmm::parallel::comm cpp_tools::parallel_manager::mpi::request::waitall(tab_mpi_status.size(), tab_mpi_status.data()); } // // check - /* - for(auto p = 0; p < nb_proc; ++p) - { - io::print("step 2 [" + std::to_string(p) + "] ", morton_to_send[p]); - } - */ + // for(auto p = 0; p < nb_proc; ++p) + // { + // io::print("step 2 morton_to_send[" + std::to_string(p) + "] ", morton_to_send[p]); + // } } - /** - * @brief For the vector of Morton indices to be sent to processor p, we construct a direct access to the component - * - * @tparam IteratorType - * @tparam VectorOfVectorStructType - * @tparam VectorOfVectorType + * @brief For the vector of Morton indices to be sent to processor p, we construct a direct access to the component (cell or leaf) + * + * @tparam iterator_type + * @tparam vector_vector_struct_type + * @tparam vector_vector_type * @param nb_proc the number of processors * @param begin_grp the first iterator on the group * @param end_grp the last iterator on the group * @param component_access the access to the component (iterator on group, position into the group) - * @param for each processor the vector of Morton indexes to send + * @param morton_to_send for each processor the vector of Morton indexes to send */ - template<typename IteratorType, typename VectorOfVectorStructType, typename VectorOfVectorType> - auto build_direct_access_to_leaf(const int nb_proc, IteratorType begin_grp, IteratorType end_grp, - VectorOfVectorStructType& component_access, VectorOfVectorType const&) -> void + template<typename iterator_type, typename vector_vector_struct_type, typename vector_vector_type> + auto build_direct_access_to_components(const int nb_proc, iterator_type begin_grp, iterator_type end_grp, + vector_vector_struct_type& component_access, + vector_vector_type const& morton_to_send) -> void { - using access_type = typename VectorOfVectorStructType::value_type; - using vector_morton_type = typename VectorOfVectorType::value_type; - auto build_index_grp = [](auto begin_grp, auto end_grp, vector_morton_type const& _p, access_type& to_send_p) + using access_type = typename vector_vector_struct_type::value_type; + using vector_morton_type = typename vector_vector_type::value_type; + + bool verbose{false}; + auto build_index_grp = + [&verbose](auto begin_grp, auto end_grp, vector_morton_type const& morton_to_send_p, access_type& to_send_p) { int idx{0}; - int max_idx = _p.size(); + int max_idx = morton_to_send_p.size(); to_send_p.resize(max_idx); // loop on the groups // auto it = std::begin(buffer); @@ -207,40 +293,71 @@ namespace scalfmm::parallel::comm int start_grp{0}; auto const& csymb = (*grp_ptr)->csymbolics(); // iterate on the cells - while(idx < max_idx and math::between(_p[idx], csymb.starting_index, csymb.ending_index)) + // if(verbose) + // std::clog << idx << " morton "<< morton_to_send_p[idx] << " in [ "<<csymb.starting_index<< ", " << csymb.ending_index << "[\n"; + while(idx < max_idx and math::between(morton_to_send_p[idx], csymb.starting_index, csymb.ending_index)) { // find cell inside the group int pos{-1}; for(int i = start_grp; i < (*grp_ptr)->size(); ++i) { auto morton = (*grp_ptr)->component(i).csymbolics().morton_index; - if(_p[idx] == morton) + if(morton_to_send_p[idx] == morton) { pos = i; start_grp = i + 1; to_send_p[idx].first = grp_ptr; to_send_p[idx].second = i; + // if (verbose) + // std::clog << " m= "<<morton << " ptr " << to_send_p[idx].first->get() << " pos " << i << std::endl; break; } } ++idx; } } + if(idx != max_idx) + { + std::cerr << "Didn't found the good number of morton indexes\n"; + // std::string outName2("bug_direct_access_to_component_rank_" + std::to_string(rank) + ".txt"); + // std::ofstream out(outName2); + // scalfmm::io::trace(out, letGroupTree, 2); + // out << "\n" << " \n"; + io::print(std::cerr, "morton_to_send: ", morton_to_send_p); + std::cerr << "\n missing morton: "; + // out << "missing morton: "; + + for(int i = idx; i < max_idx; ++i) + { + std::cerr << morton_to_send_p[i] << " "; + // out << morton_to_send_p[i] << " "; + } + std::cerr << "\n"; + // out.close(); + throw std::runtime_error(" Missing morton index in building direct compononent access"); + } }; for(auto p = 0; p < nb_proc; ++p) { - // io::print(" [" + std::to_string(p) + "] ", morton_to_send[p]); - - if([p].size() != 0) + // io::print(std::clog, " morton_to_send[" + std::to_string(p) + "] ", morton_to_send[p]); + if(morton_to_send[p].size() != 0) { - build_index_grp(begin_grp, end_grp, [p], component_access[p]); - auto const& elt = component_access[p]; - // for(auto i = 0; i < elt.size(); ++i) + // verbose = p == 3 ? true : false; + build_index_grp(begin_grp, end_grp, morton_to_send[p], component_access[p]); + // if(p == 3) // { - // std::cout << " " << p << " " - // << (*(elt[i].first))->component(elt[i].second).csymbolics().morton_index << " " - // << elt[i].second << " " << [p][i] << " nb part " - // << (*(elt[i].first))->component(elt[i].second).size() << std::endl; + // auto const& elt = component_access[p]; + // for(auto i = 0; i < elt.size(); ++i) + // { + // std::clog + // << " -->p=" << p << " ptr=" + // << elt[i].first->get() + // // << " m=" << (*(elt[i].first))->component(elt[i].second).csymbolics().morton_index + // << " pos=" << elt[i].second << " m=" + // << morton_to_send[p][i] + // // << " nb part " << (*(elt[i].first))->component(elt[i].second).size() + // << std::endl; + // } // } } } @@ -248,22 +365,19 @@ namespace scalfmm::parallel::comm /** * @brief Construct the MPI type of the particle according to leaf_to_access - * - * @tparam dimension - * @tparam VectorOfVectorStructType + * + * @tparam dimension + * @tparam vector_vector_struct_type * @param leaf_to_access For each processor the leaf to access (for receiving or sending) - * @param nb_inputs * @param mpi_position_type the MPI type of the coordinate of the points of the particles * @param mpi_input_type the MPI type of the inputs of the particles - * @return std::vector<MPI_Datatype> + * @return std::vector<MPI_Datatype> */ - template<std::size_t Dimension, typename VectorOfVectorStructType> - inline auto build_mpi_particles_type(VectorOfVectorStructType const& leaf_to_access, int const nb_inputs, + template<std::size_t dimension, typename vector_vector_struct_type> + auto inline build_mpi_particles_type(vector_vector_struct_type const& leaf_to_access, int const nb_inputs, MPI_Datatype mpi_position_type, MPI_Datatype mpi_input_type) -> std::vector<MPI_Datatype> { - static constexpr std::size_t dimension = Dimension; - const int nb_proc{int(leaf_to_access.size())}; std::vector<MPI_Datatype> newtype(nb_proc); @@ -271,6 +385,7 @@ namespace scalfmm::parallel::comm { if(leaf_to_access[p].size() != 0) { + // leaf_to_access[p] = std::vector<pair> [i] = (group_ptr, index_in_group) auto const& elt = leaf_to_access[p]; int nb_mpi_types{int(elt.size() * (dimension + nb_inputs))}; std::vector<int> length(nb_mpi_types, 1); @@ -308,11 +423,147 @@ namespace scalfmm::parallel::comm type[i + stride * nb_elt] = type[i + dimension * nb_elt]; MPI_Get_address(&(leaf[0].inputs(k)), &disp[i + stride * nb_elt]); } + // std::cout << p << " " << leaf.csymbolics().morton_index << " nb part " << leaf.size() << " *ptr_x " + // << proxy_position << " snd part " << *(ptr_x + 1) << " inputs0: " << leaf[0].inputs()[0] + // << " inputs1: " << *(&(leaf[0].inputs()[0]) + 1) << " ptr " << *(ptr_inputs_0 + 1) + // << std::endl; } // end loop on leaf_view + // std::cout << " create type " << std::endl; + // io::print(" " + std::to_string(p) + " disp", disp); MPI_Type_create_struct(nb_mpi_types, length.data(), disp.data(), type.data(), &newtype[p]); MPI_Type_commit(&newtype[p]); + // std::cout << " send to " << p << " size " << size_msg << std::endl; } } return newtype; } + /// @brief Construct the MPI type of all multipoles to send to a different process + /// @tparam vector_vector_struct_type + /// @param cell_to_access + /// @param nb_inputs + /// @param mpi_multipole_type + /// @return A vector of MPI type + template<typename vector_vector_struct_type> + auto inline build_mpi_multipoles_type(vector_vector_struct_type const& cell_to_access, int const nb_inputs, + MPI_Datatype& mpi_multipole_type) -> std::vector<MPI_Datatype> + { + // std::cout << " build_mpi_multipoles_type inside nb_inputs" << nb_inputs << std::endl << std::flush; + + const int nb_proc{int(cell_to_access.size())}; + std::vector<MPI_Datatype> newtype(nb_proc, MPI_DATATYPE_NULL); + + for(auto p = 0; p < nb_proc; ++p) + { + // std::clog << " multipole type(p=" << p << ") nb cells to pack " << cell_to_access[p].size() << "\n"; + // std::clog << std::flush; + if(cell_to_access[p].size() != 0) + { + auto const& elt = cell_to_access[p]; + // number of mpi type to construct (=cells number * nb_inputs) + int nb_mpi_types{int(elt.size() * nb_inputs)}; + // + std::vector<int> length(nb_mpi_types, 1); + std::vector<MPI_Aint> disp(nb_mpi_types); + std::vector<MPI_Datatype> type(nb_mpi_types); + // if(p == 3) + // { + // //bug + // for(auto i = 0; i < elt.size(); ++i) + // { + // std::clog << " ptr: " << *(elt[i].first) << " pos " << elt[i].second << std::endl; + // } + // std::clog << "------\n"; + // } + int size_msg{0}; + for(auto i = 0; i < elt.size(); ++i) + { + // *(elt[i].first) = ptr_group + // elt[i].second = index inside group + int jump{0}; + auto const& cell = (*(elt[i].first))->component(elt[i].second); + // tuple of iterators + // cell[0] return a particle proxy on the first particle + auto const& m = cell.transfer_multipoles(); + auto nb_m = m.size(); // get number of multipoles = nb_inputs + // std::cout << " nb_m" << m.size() << std::endl << std::flush; + for(std::size_t k{0}; k < nb_m; ++k) + { + auto const& ten = m.at(k); + // std::cout << " size " << int(ten.size()) << std::endl << std::flush; + MPI_Type_contiguous(int(ten.size()), mpi_multipole_type, &type[i * nb_inputs + k]); + // MPI_Get_address(&(ten.data()[0]), &disp[i * nb_inputs + k]); + MPI_Get_address(ten.data(), &disp[i * nb_inputs + k]); + // std::cout << " i * nb_inputs + k " << i * nb_inputs + k << " nb_mpi_types " << nb_mpi_types + // << std::endl + // << std::flush; + } + } // end loop on cell_view + // io::print(std::clog, "m2l(type) disp: ", disp); std::clog << std::flush; + // io::print(std::clog, "m2l(type) length: ", length); std::clog << std::flush; + + MPI_Type_create_struct(nb_mpi_types, length.data(), disp.data(), type.data(), &newtype[p]); + MPI_Type_commit(&newtype[p]); + // std::clog << std::flush; + } + } + return newtype; + } + template<typename vector_vector_struct_type> + auto inline build_mpi_multipoles_type2(vector_vector_struct_type const& cell_to_access, int const nb_inputs, + MPI_Datatype mpi_multipole_type) -> std::vector<MPI_Datatype> + { + const int nb_proc{int(cell_to_access.size())}; + std::vector<MPI_Datatype> newtype(nb_proc, MPI_DATATYPE_NULL); + + for(auto p = 0; p < nb_proc; ++p) + { + if(cell_to_access[p].size() != 0) + { + auto const& elt = cell_to_access[p]; + // number of mpi type to construct (=cells number * nb_inputs) + int nb_mpi_types{int(elt.size() * nb_inputs)}; + // + std::vector<int> length(nb_mpi_types); + std::vector<MPI_Aint> disp(nb_mpi_types); + std::vector<MPI_Datatype> type(nb_mpi_types, mpi_multipole_type); + + int size_msg{0}; + for(auto i = 0; i < elt.size(); ++i) + { + // *(elt[i].first) = ptr_group + // elt[i].second = index inside group + int jump{0}; + auto const& cell = (*(elt[i].first))->component(elt[i].second); + // + auto const& m = cell.transfer_multipoles(); + auto nb_m = m.size(); // get number of mutilpoles = nb_inputs + // std::cout << " nb_m" << m.size() <<std::endl; + for(std::size_t k{0}; k < nb_m; ++k) + { + auto const& ten = m.at(k); + // std::cout << " size " << int(ten.size()) << std::endl; + // MPI_Type_contiguous(int(ten.size()), mpi_multipole_type, &type[i * nb_inputs + k]); + length[i * nb_inputs + k] = ten.size(); + MPI_Get_address(&(ten.data()[0]), &disp[i]); + } + // std::cout << p << " " << cell.csymbolics().morton_index << " nb part " << cell.size() << " *ptr_x " + // << proxy_position << " snd part " << *(ptr_x + 1) << " inputs0: " << cell[0].inputs()[0] + // << " inputs1: " << *(&(cell[0].inputs()[0]) + 1) << " ptr " << *(ptr_inputs_0 + 1) + // << std::endl; + } // end loop on cell_view + // std::cout << " create type " << std::endl; + // io::print(" " + std::to_string(p) + " disp", disp); + MPI_Type_create_struct(nb_mpi_types, length.data(), disp.data(), type.data(), &newtype[p]); + MPI_Type_commit(&newtype[p]); + // std::cout << " send to " << p << " size " << size_msg << std::endl; + } + } + return newtype; + } + + template<typename Tree> + void prepare_comm_up(Tree tree) { + + }; + } // namespace scalfmm::parallel::comm diff --git a/include/scalfmm/parallel/mpi/utils.hpp b/include/scalfmm/parallel/mpi/utils.hpp index caf7833ffe8f46aba293b4265c7794c74f18514a..81a810f655883ab31a63dcd9e641d823c490dfce 100644 --- a/include/scalfmm/parallel/mpi/utils.hpp +++ b/include/scalfmm/parallel/mpi/utils.hpp @@ -1,10 +1,11 @@ -// -------------------------------- -// See LICENCE file at project root -// File : scalfmm/parallel/mpi/utils.hpp -// -------------------------------- #ifndef _PARALLEL_MPI_UTILS_HPP_ #define _PARALLEL_MPI_UTILS_HPP_ +#include <algorithm> +#include <fstream> +#include <iostream> +#include <vector> + #include <cpp_tools/colors/colorized.hpp> #include <cpp_tools/parallel_manager/parallel_manager.hpp> @@ -15,25 +16,19 @@ #include <inria/algorithm/distributed/mpi.hpp> #include <inria/linear_tree/balance_tree.hpp> #endif - -#include <algorithm> -#include <fstream> -#include <iostream> -#include <vector> - namespace scalfmm::parallel::utils { /** * @brief print the distribution of components (cells/leaves) in a stream * - * @tparam VectorType + * @tparam Vector * @param out the stream * @param header the header to write * @param distrib */ - template<typename VectorType> - inline auto print_distrib(std::ostream& out, std::string const& header, VectorType const& distrib) -> void + template<typename Vector> + auto inline print_distrib(std::ostream& out, std::string const& header, Vector const& distrib) -> void { out << header; for(auto p: distrib) @@ -42,22 +37,20 @@ namespace scalfmm::parallel::utils } out << std::endl; } - /** * @brief print the distribution of components (cells/leaves) * - * @tparam VectorType + * @tparam Vector * @param header the header to write * @param rank the process id * @param distrib the vector of distribution */ - template<typename VectorType> - inline auto print_distrib(std::string const& header, int rank, VectorType const& distrib) -> void + template<typename Vector> + auto inline print_distrib(std::string const& header, int rank, Vector const& distrib) -> void { std::string new_header("rank(" + std::to_string(rank) + ") " + header); print_distrib(std::cout, new_header, distrib); } - /** * @brief construct the morton indexes at the parent level * @@ -69,31 +62,30 @@ namespace scalfmm::parallel::utils * @tparam VectorMortonIdx the type of the vector of Morton index * @param[inout] leafMortonIdx the vector of Morton index */ - template<int Dimension, typename VectorMortonIdx> - inline auto move_index_to_upper_level(VectorMortonIdx& leafMortonIdx) -> void + template<int dimension, typename VectorMortonIdx> + auto inline move_index_to_upper_level(VectorMortonIdx& leafMortonIdx) -> void { // Move leafMortonIdx to level level_shared for(auto& p: leafMortonIdx) { - p = p >> Dimension; + p = p >> dimension; } auto last = std::unique(leafMortonIdx.begin(), leafMortonIdx.end()); leafMortonIdx.erase(last, leafMortonIdx.end()); } - /** - * @brief send_get_min_morton_idx send Morton index to the left and get value from the right - * - * @tparam IndexType - * @param para - * @param[in] morton_idx the Morton index to send o send to processor p-1 - * @return the Morton index coming from the right - */ - template<typename IndexType> - [[nodiscard]] IndexType send_get_min_morton_idx(cpp_tools::parallel_manager::parallel_manager& para, - IndexType& morton_idx) + /// + /// \brief send_get_min_morton_idx send Morton index to the left and get value from the right + /// + /// \param[in] conf the mpi conf + /// \param[in] morton_idx the Morton index to send o send to processor p-1 + /// \return the Morton index coming from the right + /// + template<typename index_type> + [[nodiscard]] index_type send_get_min_morton_idx(cpp_tools::parallel_manager::parallel_manager& para, + index_type& morton_idx) { // Setting parameter - IndexType buff_recev{0}; + index_type buff_recev{0}; #ifdef SCALFMM_USE_MPI auto comm = para.get_communicator(); int nb_proc = comm.size(); @@ -102,7 +94,7 @@ namespace scalfmm::parallel::utils if(nb_proc != 1) { cpp_tools::parallel_manager::mpi::request tab_mpi_status; - auto mpi_type = cpp_tools::parallel_manager::mpi::get_datatype<IndexType>(); + auto mpi_type = cpp_tools::parallel_manager::mpi::get_datatype<index_type>(); const int sender = (my_rank + 1 == nb_proc) ? MPI_PROC_NULL : my_rank + 1; const int receiver = (my_rank == 0) ? MPI_PROC_NULL : my_rank - 1; @@ -122,24 +114,24 @@ namespace scalfmm::parallel::utils } #ifdef SCALFMM_USE_MPI - /** - * @brief exchange_data_left_right to exchange data left and right between processor left and right - * - * The processor p send data_left to processor p-1 and receive from it data_right and - * p send data_right to processor p+1 and receive from it data_left - * - * @tparam DataType - * @param[in] conf - * @param[in] data_left data to send to processor left - * @param[in] data_right data to send to processor right - * @return a tuple containing the value_right of processor on the left and the value left coming from processor right - */ - template<typename DataType> - auto exchange_data_left_right(cpp_tools::parallel_manager::mpi_config& conf, DataType& data_left, - DataType& data_right) + /// + /// \brief exchange_data_left_right to exchange data left and right between processor left and right + /// + /// The processor p send data_left to processor p-1 and receive from it data_right and + /// p send data_right to processor p+1 and receive from it data_left + /// \param[in] conf + /// \param[in] data_left data to send to processor left + /// \param[in] data_right data to send to processor right + /// + /// \return a tuple containing the value_right of processor on the left and the + /// value left coming from processor right + /// + template<typename data_type> + auto exchange_data_left_right(cpp_tools::parallel_manager::mpi_config& conf, data_type& data_left, + data_type& data_right) { // Setting parameter - DataType buff_p{0}, buff_n{0}; + data_type buff_p{0}, buff_n{0}; auto comm = conf.comm; int nb_proc = comm.size(); int my_rank = comm.rank(); @@ -148,7 +140,7 @@ namespace scalfmm::parallel::utils { // First exchange to the left cpp_tools::parallel_manager::mpi::request tab_mpi_status[2]; - // auto mpi_type = cpp_tools::parallel_manager::mpi::get_datatype<IndexType>(); + // auto mpi_type = cpp_tools::parallel_manager::mpi::get_datatype<index_type>(); // if i'm not the last proc const int right = (my_rank + 1 == nb_proc) ? MPI_PROC_NULL : my_rank + 1; const int left = (my_rank == 0) ? MPI_PROC_NULL : my_rank - 1; @@ -170,28 +162,33 @@ namespace scalfmm::parallel::utils } #endif - /** - * @brief Distribute uniformly on the processes the leaves. - * - * Split in interval (semi-open) the leaves - * The algorithm is - * 1) we distribute the particles according to their Morton index. The - * the leaves are split on the processor by their Morton index - * - * 2) balanced the Morton index by some criteria to define - * - * @tparam MortonArrayType - * @param manager - * @param mortonArray - * @return auto - */ - template<typename MortonArrayType> - auto balanced_leaves(cpp_tools::parallel_manager::parallel_manager& manager, MortonArrayType& mortonArray) + /// + /// \brief Distribute uniformly on the processes the leaves. + /// + /// Split in interval (semi-open) the leaves + /// The algorithm is + /// 1) we distribute the particles according to their Morton index. The + /// the leaves are split on the processor by their Morton index + /// + /// 2) balanced the Morton index by some criteria to define + /// + /// parameter[inout] mortonArray Morton index located on the processor + /// parameter[out] the distribution of leaves + + template<typename MortonArray_type> + auto balanced_leaves(cpp_tools::parallel_manager::parallel_manager& manager, MortonArray_type& mortonArray) { - using morton_type = typename MortonArrayType::value_type; + // std::cout << cpp_tools::colors::green << " --> Begin distrib::balanced_leaves " << cpp_tools::colors::reset + // << std::endl; + // + using morton_type = typename MortonArray_type::value_type; auto rank = manager.get_communicator().rank(); - + // io::print("rank(" + std::to_string(rank) + ") leafMortonIdx: ", mortonArray); + // auto last = std::unique(mortonArray.begin(), mortonArray.end()); + // mortonArray.erase(last, mortonArray.end()); + // io::print("rank(" + std::to_string(rank) + ") leafMortonIdx U: ", mortonArray); + // // get max and min of the Morton index owned by current process // [min, max] On two consecutive processes we may have max[p] = min[p+1] // we remove such case @@ -215,7 +212,7 @@ namespace scalfmm::parallel::utils /// Construct a uniform distribution of the Morton index /// - MortonArrayType morton_distrib; + MortonArray_type morton_distrib; try { inria::mpi_config conf_tmp(manager.get_communicator().raw_comm); @@ -226,40 +223,54 @@ namespace scalfmm::parallel::utils { std::cerr << e.what() << '\n'; } + // print("rank(" + std::to_string(rank) + "morton_distrib ", morton_distrib); + // manager.comm.barrier(); + + // std::cout << "rank(" + std::to_string(rank) + ") Morton distrib [" << morton_distrib[0] << ", + // " + // << morton_distrib[morton_distrib.size() - 1] << "]\n"; + + //print("rank(" + std::to_string(rank) + ") Distrib cells Index: ", morton_distrib); cell_distrib.resize(manager.get_num_processes(), {0, 0}); std::array<morton_type, 2> local{morton_distrib[0], morton_distrib[morton_distrib.size() - 1]}; cell_distrib[0] = local; + // print("rank(" + std::to_string(rank) + ") local: ", local); /// share the distribution on all processors manager.get_communicator().allgather(local.data(), sizeof(local), MPI_CHAR, cell_distrib.data(), sizeof(local), MPI_CHAR /*, 0*/); #endif + // std::cout << cpp_tools::colors::red; + // io::print("rank(" + std::to_string(rank) + ") cell_distrib: ", cell_distrib); + + // std::cout << cpp_tools::colors::green << " --> End distrib::balanced_leaves " << cpp_tools::colors::reset + // << std::endl; return cell_distrib; } - - /** - * @brief balanced_particles compute a balanced particle distribution - * - * 1) we distribute the particles according to their Morton index. The - * the leaves - * are split on the processor by their Morton index - * 2) balanced the Morton index by some criteria to define - * - * @tparam ParticleArrayType - * @tparam MortonArrayType - * @param manager the parallel manager. - * @param partArray the vector of particles own by the processor. - * @param morton_array the morton index located on the processor. - * @param number_of_particles the total number of particles on all processes. - * @return the distribution in terms of Morton index. - */ - template<typename ParticleArrayType, typename MortonArrayType> - auto balanced_particles(cpp_tools::parallel_manager::parallel_manager& manager, ParticleArrayType& partArray, - const MortonArrayType& morton_array, const std::size_t& number_of_particles) + /// + /// \brief balanced_particles compute a balanced particle distribution + /// + /// 1) we distribute the particles according to their Morton index. The + /// the leaves + /// are split on the processor by their Morton index + /// 2) balanced the Morton index by some criteria to define + /// + /// input[in] tha parallel manager + /// input[in] partArray the vector of particles own by the processor + /// input[inout] mortonArray the morton index located on the processor + /// input[in] number_of_particles the total number of particles on all processes + /// + /// \return the distribution in terms of Morton index (std::vector<std::array<Morton_type,2>>) + template<typename ParticleArray_type, typename MortonArray_type> + auto balanced_particles(cpp_tools::parallel_manager::parallel_manager& manager, ParticleArray_type& partArray, + const MortonArray_type& morton_array, const std::size_t& number_of_particles) { - using Morton_type = typename MortonArrayType::value_type; - using MortonDistribType = std::array<int, 2>; + std::cout << cpp_tools::colors::green << " --> Begin distrib::balanced_particles " << cpp_tools::colors::reset + << std::endl; + + using Morton_type = typename MortonArray_type::value_type; + using MortonDistrib_type = std::array<int, 2>; auto rank = manager.get_process_id(); auto nb_proc = manager.get_num_processes(); @@ -270,21 +281,31 @@ namespace scalfmm::parallel::utils auto LeafMortonIndex(morton_array); auto last = std::unique(LeafMortonIndex.begin(), LeafMortonIndex.end()); LeafMortonIndex.erase(last, LeafMortonIndex.end()); + io::print(" LeafMortonIndex ", LeafMortonIndex); /// LeafMortonIndex has the size of the number of leaves /// weight = ({Morton index, number of particles}) for each leaf - std::vector<MortonDistribType> weight(LeafMortonIndex.size(), {bad_index, 0}); + std::vector<MortonDistrib_type> weight(LeafMortonIndex.size(), {bad_index, 0}); std::size_t pos = 0; weight[pos][0] = LeafMortonIndex[pos]; + // std::cout << cpp_tools::colors::red << "leaf size: " << LeafMortonIndex.size() << std::endl; { // loop on the number of particles for(std::size_t part = 0; part < morton_array.size(); ++part) { + // std::cout << "part " << part << " " << + // tmp[part] << " pos " << pos << " " << + // leafMortonIdx[pos] + // << " " << weight[pos] << + // std::endl; while(morton_array[part] != LeafMortonIndex[pos]) { + // std::cout << " new pos " << pos << + // std::endl; pos++; } weight[pos][1] += 1; weight[pos][0] = LeafMortonIndex[pos]; } + io::print("rank(" + std::to_string(rank) + ") weight: ", weight); } // get max and min of the Morton index owned by current process @@ -297,14 +318,17 @@ namespace scalfmm::parallel::utils if(nb_proc == 1) { morton_distrib[0] = {minIndex[0], maxIndex[0]}; - // return morton_distrib; + // parallel::utils::print_distrib(std::cout, "distrib proc=1", morton_distrib); + + return morton_distrib; } + io::print("rank(" + std::to_string(rank) + ") weight initial: ", weight); #ifdef SCALFMM_USE_MPI cpp_tools::parallel_manager::mpi_config conf(manager.get_communicator()); - MortonDistribType weight_prev, weight_next; + MortonDistrib_type weight_prev, weight_next; std::tie(weight_prev, weight_next) = exchange_data_left_right(conf, minIndex, maxIndex); if(maxIndex[0] == weight_next[0]) @@ -316,6 +340,7 @@ namespace scalfmm::parallel::utils weight[0][1] += weight_prev[1]; } + // io::print("rank(" + std::to_string(rank) + ") weight final: ", weight); /// /// compute the number of particles in the leaves int nb_part = 0; @@ -332,15 +357,20 @@ namespace scalfmm::parallel::utils { block = number_of_particles - rank * block; } + std::cout << "rank(" << rank << ") N particles: " << nb_part << " block " << block << std::endl; std::array<Morton_type, 3> local{weight[0][0], weight[weight.size() - 1][0], nb_part}; std::vector<std::array<Morton_type, 3>> part_distrib(nb_proc); part_distrib[0] = local; + // io::print("rank(" + std::to_string(rank) + ") 0 Distrib cells Index: ", part_distrib); + // std::cout << "rank(" << rank << ") local: " <<local[0]<<" " <<local[1]<<" " <<local[2] <<std::endl; + /// share the distribution on all processors auto nb_elt = sizeof(local); conf.comm.allgather(local.data(), nb_elt, MPI_CHAR, part_distrib[0].data(), nb_elt, MPI_CHAR /*, 0*/); + // io::print("rank(" + std::to_string(rank) + ") Distrib cells Index: ", part_distrib); /// /// Try to have the same number of particles on a processor /// @@ -356,7 +386,9 @@ namespace scalfmm::parallel::utils numberLeaves[i] = part_distrib[i][1] - part_distrib[i][0] + 1; maxLeaves = std::max(numberLeaves[i], maxLeaves); } + // io::print("rank(" + std::to_string(rank) + ") numberLeaves: ", numberLeaves); + // std::cout << "rank(" + std::to_string(rank) + ") initial tomove: " << maxLeaves << std::endl; /// Prevent to have 0 cell on a processor. if(maxLeaves > 1) { @@ -365,6 +397,8 @@ namespace scalfmm::parallel::utils tomove[i] = part_distrib[i][2] - block; } } + // io::print("rank(" + std::to_string(rank) + ") initial tomove: ", tomove); + // if(rank == 0) for(int i = 0; i < nb_proc - 1; ++i) { @@ -380,9 +414,27 @@ namespace scalfmm::parallel::utils tomove[i] = 0; tomove[i + 1] += tomove[i]; } + // print(" end (" + std::to_string(i) + ") + // tomove: ", tomove); print(" end (" + + // std::to_string(i) + ") tosendR: ", + // tosendR); print(" end (" + + // std::to_string(i) + ") tosendL: ", + // tosendL); } tosendR[nb_proc - 1] = 0; + // io::print("rank(" + std::to_string(rank) + ") tomove: ", tomove); + // io::print("rank(" + std::to_string(rank) + ") tosendR: ", tosendR); + // io::print("rank(" + std::to_string(rank) + ") tosendRL: ", tosendL); + /// + // std::cout << "tosendL(" + std::to_string(rank) + "): " << tosendL[rank] << std::endl; + // std::cout << "tosendR(" + std::to_string(rank) + "): " << tosendR[rank] << std::endl; + // if(rank > 0) + // std::cout << "toReceivL(" + std::to_string(rank) + "): " << tosendR[rank - 1] << + // std::endl; + // if(rank < nb_proc - 1) + // std::cout << "toReceivR(" + std::to_string(rank) + "): " << tosendL[rank + 1] << + // std::endl; int toReceivL, toReceivR; toReceivL = tosendR[rank - 1] > 0 ? 1 : 0; @@ -392,48 +444,76 @@ namespace scalfmm::parallel::utils /// int nb_leaf_to_left{0}, nb_leaf_to_right{0}, nb_part_to_left{0}, nb_part_to_right{0}; Morton_type morton_to_left{0}, morton_to_right{0}; - MortonDistribType MortonPart_to_left{{0, 0}}, MortonPart_to_right{{0, 0}}; + MortonDistrib_type MortonPart_to_left{{0, 0}}, MortonPart_to_right{{0, 0}}; + // std::cout << rank << " Morton [ " << MortonPart_to_left << ", " << MortonPart_to_right << "]" << std::endl; if(tosendL[rank] > 0) { int leaf_idx = 0; nb_part_to_left = weight[leaf_idx][1]; + // std::cout << " tosendL leaf_idx " << leaf_idx << " " << nb_part_to_left << std::endl; while(nb_part_to_left <= tosendL[rank]) { leaf_idx++; nb_part_to_left += weight[leaf_idx][1]; + // std::cout << " tosendL new pos " << leaf_idx << " " << nb_part_to_left << std::endl; } - nb_leaf_to_left = leaf_idx + 1; + + // nb_leaf_to_left = leaf_idx + 1; + nb_leaf_to_left = leaf_idx; morton_to_left = weight[leaf_idx][0]; MortonPart_to_left = {weight[leaf_idx][0], nb_leaf_to_left}; // New starting Morton index for the local distribution - local[0] = weight[leaf_idx + 1][0]; + // local[0] = weight[leaf_idx + 1][0]; // Bug here ? + local[0] = weight[leaf_idx][0]; // Bug here ? + + // std::cout << rank << " local[0] " << local[0] << std::endl; + // std::cout << rank << " send morton_to_left" << morton_to_left << std::endl; } if(tosendR[rank] > 0) { int leaf_idx = weight.size() - 1; nb_part_to_right = weight[leaf_idx][1]; + // std::cout << "tosendR leaf_idx " << leaf_idx << " " << nb_part_to_right << std::endl; while(nb_part_to_right <= tosendL[rank]) { leaf_idx--; nb_part_to_right += weight[leaf_idx][1]; + // std::cout << " - tosendR new pos " << leaf_idx << " " << nb_part_to_right << + // std::endl; } nb_leaf_to_right = leaf_idx + 1; morton_to_right = weight[leaf_idx][0]; MortonPart_to_right = {weight[leaf_idx][0], nb_leaf_to_left}; // New starting Morton index for the local distribution local[1] = weight[leaf_idx][0]; + + // std::cout << rank << " send " << nb_leaf_to_right << " leaf to right - nb part " << nb_part_to_right + // << " " << MortonPart_to_right[0] << std::endl; + // std::cout << rank << "send morton_to_right " << morton_to_right << std::endl; } local[3] = 0; + // std::cout << rank << " local partition [ " << local[0] << ", " << local[1] << "]" << std::endl; /// Send the number /// send to left and right + // int nb_elt_from_left{0}, nb_elt_from_right{0}; + // Morton_type min_idx{part_distrib[rank][0]}, max_idx{part_distrib[rank][1]}; Morton_type morton_from_left{local[0]}, morton_from_right{local[1]}; /// receive from left right + // auto exchange_val = [&manager, &rank, + // &nb_proc, &tosendL, &tosendR, &toReceivL, + // &toReceivR](const auto& + // nb_part_to_left, const + // auto& nb_part_to_right, + // auto& + // nb_elt_from_left, + // auto& + // nb_elt_from_right) { // compute the buffer size @@ -468,38 +548,50 @@ namespace scalfmm::parallel::utils { cpp_tools::parallel_manager::mpi::request::waitall(toReceivL + toReceivR, tab_mpi_status); } + + // std::cout << rank << " Morton Left: " << morton_from_left << " Morton right: " << morton_from_right + // << std::endl; } + + // exchange_val(nb_part_to_left, nb_part_to_right, + // nb_elt_from_left, nb_elt_from_right); + + // std::array<Morton_type, 3> local{weight[0][0], weight[weight.size() - 1][0], nb_part}; + // io::print("rank(" + std::to_string(rank) + ") 00 Distrib cells Index: ", part_distrib); + std::array<Morton_type, 2> local1 = {std::max(morton_from_left, part_distrib[rank][0]), std::min(morton_from_right, part_distrib[rank][1])}; + // std::cout << rank << " final local 1 [ " << local1[0] << ", " << local1[1] << "]" << std::endl; + morton_distrib[0] = local1; + // print("rank(" + std::to_string(rank) + ") Distrib cells Index: ", part_distrib); + // std::cout << "rank(" << rank << ") Distrib Leaf Index: " << + // nb_part << std::endl; + /// share the distribution on all processors nb_elt = sizeof(local1); conf.comm.allgather(local1.data(), nb_elt, MPI_CHAR, morton_distrib[0].data(), nb_elt, MPI_CHAR /*, 0*/); + // io::print("rank(" + std::to_string(rank) + ") Morton distrib final: ", morton_distrib); #endif + std::cout << cpp_tools::colors::green << " --> End distrib::balanced_particles " << cpp_tools::colors::reset + << std::endl; return morton_distrib; } - /** - * @brief - * - * @tparam ParticleArrayType - * @tparam MortonArrayType - * @tparam MortonDistribType - * @param my_rank - * @param particles - * @param morton_array - * @param morton_dist - * @return auto - */ - template<typename ParticleArrayType, typename MortonArrayType, typename MortonDistribType> - auto compute_communications(int my_rank, ParticleArrayType& particles, const MortonArrayType& morton_array, - const MortonDistribType& morton_dist) + template<typename ParticlesArray_type, typename MortonArray_type, typename MortonDistrib_type> + auto compute_communications(int my_rank, ParticlesArray_type& particles, const MortonArray_type& morton_array, + const MortonDistrib_type& morton_dist) { - using Morton_type = typename MortonArrayType::value_type; + // std::cout << cpp_tools::colors::green << " --> Begin distrib::compute_communications " + // << cpp_tools::colors::reset << std::endl; + using Morton_type = typename MortonArray_type::value_type; + // auto between = [](const Morton_type& m, const Morton_type& mmin, const Morton_type& mmax) { + // return (mmin <= m) && (m <= mmax); + // }; std::vector<int> message(morton_dist.size()); std::vector<std::array<int, 2>> details_partL(morton_dist.size(), {0, 0}), details_partR(morton_dist.size(), {0, 0}); @@ -508,7 +600,11 @@ namespace scalfmm::parallel::utils /// Compute on the left int pos = 0; bool new_start = true; + // std::string beg("rank(" + std::to_string(my_rank) + ")"); + // if(my_rank == 2) { + // io::print(beg + " morton_dist: ", morton_dist); + for(std::size_t i = 0; i < particles.size(); ++i) { if(morton_array[i] >= mortonMin) @@ -531,14 +627,30 @@ namespace scalfmm::parallel::utils new_start = false; } details_partL[pos][1] += 1; + // std::cout << beg << i << " L m_i " << morton_array[i] << " min " << + // mortonMin << " rank " + // << morton_dist[pos] << " " << std::boolalpha + // << between(morton_array[i], morton_dist[pos][0], + // morton_dist[pos][1]) << " pos " << pos + // << std::endl; } } /// Compute on the right + // print("rank(" + std::to_string(my_rank) + ") message: ", message); + // print("rank(" + std::to_string(my_rank) + ") details_part: ", details_partL); { + // print(beg + " morton_dist (Right): ", morton_dist); pos = morton_dist.size() - 1; + // my_rank + 1; for(std::size_t i = particles.size() - 1; i > 0; --i) { + // std::cout << beg << i << " R m_i " << morton_array[i] << " max " << + // mortonMax << " rank " + // << morton_dist[pos] << " " << std::boolalpha + // << between(morton_array[i], morton_dist[pos][0], + // morton_dist[pos][1]) << " pos " << pos + // << std::endl; if(morton_array[i] <= mortonMax) { break; @@ -559,18 +671,23 @@ namespace scalfmm::parallel::utils new_start = false; } details_partR[pos][1] += 1; + // std::cout << beg << i << " R m_i " << morton_array[i] << " max " << + // mortonMax << " rank " + // << morton_dist[pos] << " " << std::boolalpha + // << between(morton_array[i], morton_dist[pos][0], + // morton_dist[pos][1]) << " pos " << pos + // << std::endl; } + // print("rank(" + std::to_string(my_rank) + ") message: ", message); + // print("rank(" + std::to_string(my_rank) + ") details_part R: ", details_partR); } + // std::cout << cpp_tools::colors::green << " --> End distrib::compute_communications " + // << cpp_tools::colors::reset << std::endl; return std::make_tuple(message, details_partL, details_partR); } - /** * @brief * - * @tparam ParticleArrayType - * @tparam MortonArrayType - * @tparam MortonDistribType - * @tparam BoxType * @param manager * @param particles * @param morton_array @@ -579,21 +696,30 @@ namespace scalfmm::parallel::utils * @param leaf_level * @param total_num_particles */ - template<typename ParticleArrayType, typename MortonArrayType, typename MortonDistribType, typename BoxType> - auto fit_particles_in_distrib(cpp_tools::parallel_manager::parallel_manager& manager, ParticleArrayType& particles, - const MortonArrayType& morton_array, const MortonDistribType& morton_dist, - const BoxType& box, const int& leaf_level, const int& total_num_particles) -> void + template<typename ParticlesArray_type, typename MortonArray_type, typename MortonDistrib_type, typename Box_type> + void fit_particles_in_distrib(cpp_tools::parallel_manager::parallel_manager& manager, + ParticlesArray_type& particles, const MortonArray_type& morton_array, + const MortonDistrib_type& morton_dist, const Box_type& box, const int& leaf_level, + const int& total_num_particles) { + // std::cout << cpp_tools::colors::green << " --> Begin distrib::fit_particles_in_distrib " + // << cpp_tools::colors::reset << std::endl; int my_rank = manager.get_process_id(); int nb_proc = manager.get_num_processes(); + // std::cout << " (" << my_rank << ") size " << particles.size() << " " + // << morton_array.size() << std::endl; #ifdef SCALFMM_USE_MPI auto comm = manager.get_communicator(); + // std::cout << "\n------------- fit_particles_in_distrib -------------" << std::endl; + // io::print("rank(" + std::to_string(my_rank) + ") morton_array: ", morton_array); // get the min and the max morton index of the particles own by the // process // send the number of communication we will receive + // auto mortonMin = morton_dist[my_rank][0]; + // auto mortonMax = morton_dist[my_rank][1]; auto to_comm = std::move(compute_communications(my_rank, particles, morton_array, morton_dist)); - + // std::cout << " (" << my_rank << ") " << std::get<0>(to_comm) << std::endl; // Send these numbers auto nb_message = std::get<0>(to_comm); auto nb_length_left = std::get<1>(to_comm); @@ -602,39 +728,63 @@ namespace scalfmm::parallel::utils comm.allreduce(nb_message.data(), message_to_receiv.data(), nb_proc, MPI_INT, MPI_SUM); + // print("rank(" + std::to_string(my_rank) + ") final message: ", message_to_receiv); + + // + // int nb_message_to_receiv = + // message_to_receiv[my_rank]; int buffer_size_left{0}, buffer_size_right{0}; int nb_left = my_rank > 0 ? nb_length_left[my_rank - 1][1] : 0; int nb_right = my_rank + 1 != nb_proc ? nb_length_right[my_rank + 1][1] : 0; cpp_tools::parallel_manager::mpi_config conf(comm); std::tie(buffer_size_left, buffer_size_right) = exchange_data_left_right(conf, nb_left, nb_right); - + // std::cout << "rank(" + std::to_string(my_rank) + ") nb_left: " << nb_left << std::endl; + // std::cout << "rank(" + std::to_string(my_rank) + ") nb_right: " << nb_right << std::endl; + // std::cout << "rank(" + std::to_string(my_rank) + ") buffer_size_left: " << + // buffer_size_left + // << std::endl; std::cout << "rank(" + std::to_string(my_rank) + ") buffer_size_right: " << + // buffer_size_right << std::endl; + /// /// Send the particles /// if nb_left >0 we send a communication on the left /// if nb_right >0 we send a communication on the right /// if buffer_size_left >0 we receive a communication on the left /// if buffer_size_right >0 we receive a communication on the right - using particle_type = typename ParticleArrayType::value_type; + using particle_type = typename ParticlesArray_type::value_type; particle_type *buffer_left{nullptr}, *buffer_right{nullptr}; const int to_right = (my_rank + 1 == nb_proc) ? MPI_PROC_NULL : my_rank + 1; const int to_left = (my_rank == 0) ? MPI_PROC_NULL : my_rank - 1; if(nb_left > 0) { + // std::cout << my_rank << " send first part to " << to_left << " nb val= " << + // nb_left << " first p " + // << particles[0] << std::endl; + conf.comm.isend(particles.data(), nb_left * sizeof(particle_type), MPI_CHAR, to_left, 100); } if(nb_right > 0) { int start = particles.size() - nb_right; + // std::cout << my_rank << " send last part to " << to_right << " nb val= " << + // nb_right + // << " first p " + // << particles[start] << std::endl; conf.comm.isend(&(particles[start]), nb_right * sizeof(particle_type), MPI_CHAR, to_right, 100); } /// int nb_commL{(buffer_size_left > 0) ? 1 : 0}, nb_commR{(buffer_size_right > 0) ? 1 : 0}; std::vector<cpp_tools::parallel_manager::mpi::request> tab_mpi_status; + // buffer_right = new particle_type[buffer_size_right]; if(nb_commL > 0) { buffer_left = new particle_type[buffer_size_left]; + // std::cout << my_rank << " post a receiv on left " << to_left << " b " << + // buffer_left + // << " size " + // << buffer_size_left << std::endl; tab_mpi_status.push_back( conf.comm.irecv(buffer_left, buffer_size_left * sizeof(particle_type), MPI_CHAR, to_left, 100)); @@ -643,6 +793,10 @@ namespace scalfmm::parallel::utils { buffer_right = new particle_type[buffer_size_right]; + // std::cout << my_rank << " post a receiv on right " << to_right << " b " << + // buffer_right << " size " + // << buffer_size_right << " " << std::endl; + tab_mpi_status.push_back( conf.comm.irecv(buffer_right, buffer_size_right * sizeof(particle_type), MPI_CHAR, to_right, 100)); } @@ -650,25 +804,36 @@ namespace scalfmm::parallel::utils // Prepare the copy during the communications // int new_part_size = particles.size() - nb_left - nb_right + buffer_size_left + buffer_size_right; + // std::cout << my_rank << " old size " << particles.size() << " new size " << new_part_size << + // std::endl; - ParticleArrayType newArray(new_part_size); + ParticlesArray_type newArray(new_part_size); /// Here we copy in the right place the particles that do not move auto start = particles.begin() + nb_left /*std::advance(particles.begin(), nb_left)*/; auto end = particles.end() - nb_right /* std::advance(std::begin(particles), particles.size() - nb_right)*/; auto start_out = newArray.begin() + buffer_size_left /*std::advance(std::begin(newArray), buffer_size_left)*/; std::copy(start, end, start_out); + // conf.comm.barrier(); + // std::cout << my_rank << " status size " << tab_mpi_status.size() << std::endl; if(tab_mpi_status.size() > 0) { + // std::cout << my_rank << " I'm waiting " << tab_mpi_status.size() << " " << + // buffer_left << " " + // << buffer_right << std::endl; for(int j = 0; j < tab_mpi_status.size(); ++j) { cpp_tools::parallel_manager::mpi::status status; tab_mpi_status[j].get_status(status); + // std::cout << my_rank << " request " << j << " count " << + // status.get_count(MPI_CHAR) << " source " + // << status.source() << " tag " << status.tag() << std::endl; } cpp_tools::parallel_manager::mpi::request::waitall(tab_mpi_status.size(), tab_mpi_status.data()); } conf.comm.barrier(); + // std::cout << my_rank << " ---------- End Redistribution ----------" << std::endl; if(buffer_left) { /// Here we copy in the right place the particles that do not move @@ -703,21 +868,20 @@ namespace scalfmm::parallel::utils #else new_num_particles = particles.size(); #endif - } - /** - * @brief Build the cell distribution at one level upper - * - * @tparam VectorMortonIdx - * @tparam MortonDistribution - * @param[in] para the parallel manager - * @param[in] dimension dimension of the problem. - * @param[in] level current level to construct the cell distribution - * @param[inout] mortonCellIndex the index cell at level+1 (in) and we construct the parent cells (out). - * @param ghost_l2l - * @param[in] cells_distrib at level + 1 - * @return the cell distribution at level - */ + // std::cout << cpp_tools::colors::green << " --> End distrib::fit_particles_in_distrib " + // << cpp_tools::colors::reset << std::endl; + } + /// + /// \brief Build the cell distribution at one level upper + /// \param[in] para the parallel manager + /// \param[in] dimension of the problem + /// \param[inout] mortonCellIndex the index cell at level+1 (in) and we + /// construct the parent cells (out) + /// \param[in] level current level to construct the cell distribution + /// \param[in] cellDistrib at level + 1 + /// \return the cell distribution at level + /// template<typename VectorMortonIdx, typename MortonDistribution> inline auto build_upper_distribution(cpp_tools::parallel_manager::parallel_manager& para, const std::size_t dimension, const int& level, @@ -725,9 +889,14 @@ namespace scalfmm::parallel::utils const MortonDistribution& cells_distrib) -> MortonDistribution { using morton_type = typename VectorMortonIdx::value_type; - + // std::cout << cpp_tools::colors::blue << " --> Begin distrib::build_upper_distribution at level " << level + // << cpp_tools::colors::reset << std::endl; + // std::cout << std::endl; MortonDistribution parent_distrib(cells_distrib); auto rank = para.get_process_id(); + // std::int64_t ghost_parent{-1}; + // io::print("rank(" + std::to_string(rank) + ") cells_distrib: ", cells_distrib); + // io::print("rank(" + std::to_string(rank) + ") mortonCellIndex: ", mortonCellIndex); // get the parent distribution for(auto& p: parent_distrib) @@ -828,6 +997,7 @@ namespace scalfmm::parallel::utils mortonCellIndex.erase(last, mortonCellIndex.end()); } + // io::print("rank(" + std::to_string(rank) + ") mortonCellIndex1: ", mortonCellIndex); parent_distrib[0][0] = parent_distrib[rank][0]; parent_distrib[0][1] = parent_distrib[rank][1]; auto mpi_type = cpp_tools::parallel_manager::mpi::get_datatype<morton_type>(); @@ -835,47 +1005,66 @@ namespace scalfmm::parallel::utils /// share the distribution on all processors para.get_communicator().allgather(parent_distrib.data(), 2, mpi_type); + // print_distrib("parent_distrib(allgather):", rank, parent_distrib); + + // std::cout << cpp_tools::colors::blue << " --> End distrib::build_upper_distribution at level " << level + // << cpp_tools::colors::reset << std::endl; return parent_distrib; } - /** - * @brief Merges two sorted vectors. - * - * Elements appear only once. - * - * @tparam VectorMortonIdx - * @param v1 first vector to merge. - * @param v2 vector to merge. - * @return the merged vector to the first vector. - */ + /// + /// \brief merge two sorted vectors + /// + /// Elements appear only once + /// + /// \param[in] v1 first vector to merge + /// \param[in] v2 vector to merge + /// + /// \return the merged vector + /// to the first vector + /// template<typename VectorMortonIdx> - inline auto merge_unique(VectorMortonIdx& v1, const VectorMortonIdx& v2) -> VectorMortonIdx + inline VectorMortonIdx merge_unique(VectorMortonIdx& v1, const VectorMortonIdx& v2) { + /* std::cout << cpp_tools::colors::green << " --> Begin let::merge_unique " << + cpp_tools::colors::reset + << std::endl*/ VectorMortonIdx dst; std::merge(v1.begin(), v1.end(), v2.begin(), v2.end(), std::back_inserter(dst)); auto last = std::unique(dst.begin(), dst.end()); + // std::cout << " last " << *last <<std::endl; dst.erase(last, dst.end()); - + // std::cout << cpp_tools::colors::green << " --> End let::merge_unique " << + // cpp_tools::colors::reset + // << std::endl; + // io::print(" merge uniq dst", dst); return dst; } - /** - * @brief - * - * @tparam VectorMortonIdx - * @tparam Iterator - * @param a_beg - * @param a_end - * @param b_beg - * @param b_end - * @return VectorMortonIdx - */ template<typename VectorMortonIdx, typename Iterator> - auto merge_unique_fast(const Iterator a_beg, const Iterator a_end, const Iterator b_beg, - const Iterator b_end) -> VectorMortonIdx + VectorMortonIdx merge_unique_fast(const Iterator a_beg, const Iterator a_end, const Iterator b_beg, + const Iterator b_end) { - int j{0}; + // io::print(std::cout, " merge uniq v1", a_beg, a_end); + // std::cout << std::endl; + // io::print(std::cout, " merge uniq v2", b_beg, b_end); + // std::cout << std::endl; int n = std::distance(b_beg, b_end); + int m = std::distance(a_beg, a_end); + + if(m == 0) + { + VectorMortonIdx merged(n); + std::copy(b_beg, b_end, merged.begin()); + return merged; + } + if(n == 0) + { + VectorMortonIdx merged(m); + std::copy(a_beg, a_end, merged.begin()); + return merged; + } + int j{0}; std::vector<int> add(n); int nb_elt_to_add{0}; Iterator it_a{a_beg}, it_b{b_beg}; @@ -900,9 +1089,9 @@ namespace scalfmm::parallel::utils ++it_a; } } - n = std::distance(a_beg, a_end); + // n = std::distance(a_beg, a_end); - VectorMortonIdx merged(n + nb_elt_to_add, -1); + VectorMortonIdx merged(m + nb_elt_to_add, -1); it_a = a_beg; it_b = b_beg; @@ -931,22 +1120,19 @@ namespace scalfmm::parallel::utils } } std::copy(it_a, a_end, it); - // io::print("merged ", merged); + // io::print("merged ", merged); return merged; } - /** - * @brief find if the index exists owning the index - * - * @tparam MortonIdx - * @tparam VectorMortonIdx - * @param index - * @param my_index - * @param[in] start [optional] position to start in the distribution vector - * @return the process number (if -1 index not in my distribution) - */ + /// + /// \brief find if the index exists owning the index + /// \param[in] index + /// \param[in] distrib the index distribution + /// \param[in] start [optional] position to start in the distribution + /// vector \return the process number (if -1 index not in my distribution) + /// template<typename MortonIdx, typename VectorMortonIdx> - inline auto find_index(const MortonIdx& index, const VectorMortonIdx& my_index, std::size_t& start) -> std::int64_t + inline std::int64_t find_index(const MortonIdx& index, const VectorMortonIdx& my_index, std::size_t& start) { for(std::size_t i = start; i < my_index.size(); ++i) { @@ -968,19 +1154,8 @@ namespace scalfmm::parallel::utils } return -1; } - - /** - * @brief - * - * @tparam MortonIdx - * @tparam LeafInfo - * @param index - * @param my_leaves - * @param start - * @return std::int64_t - */ template<typename MortonIdx, typename LeafInfo> - inline auto find_index2(const MortonIdx& index, const LeafInfo& my_leaves, std::size_t& start) -> std::int64_t + inline std::int64_t find_index2(const MortonIdx& index, const LeafInfo& my_leaves, std::size_t& start) { for(std::size_t i = start; i < my_leaves.size(); ++i) { @@ -1002,22 +1177,18 @@ namespace scalfmm::parallel::utils } return -1; } - - /** - * @brief check if the morton index used in the vector of indexes exist - * - * This step needs communication - * - * @tparam VectorMortonIdx - * @tparam MortonDistribution - * @param para the parallel manager - * @param needed_idx the index to check if they exits in the other processors - * @param distrib the index distribution on all processors - * @param local_morton_idx My local morton index - */ + /// + /// \brief check if the morton index used in the vector of indexes exist + /// + /// This step needs communication + /// \param para the parallel manager + /// \param needed_idx the index to check if they exits in the other processors + /// \param distrib the index distribution on all processors + /// \param local_morton_idx My local morton index + /// template<typename VectorMortonIdx, typename MortonDistribution> - auto check_if_morton_index_exist(cpp_tools::parallel_manager::parallel_manager& para, VectorMortonIdx& needed_idx, - const MortonDistribution& distrib, const VectorMortonIdx& local_morton_idx) -> void + void check_if_morton_index_exist(cpp_tools::parallel_manager::parallel_manager& para, VectorMortonIdx& needed_idx, + const MortonDistribution& distrib, const VectorMortonIdx& local_morton_idx) { auto rank = para.get_process_id(); auto nb_proc = para.get_num_processes(); @@ -1184,30 +1355,23 @@ namespace scalfmm::parallel::utils // We remove the bad_index in order to have only the existing components (leaf/cell) std::sort(needed_idx.begin(), needed_idx.end()); auto last = std::unique(needed_idx.begin(), needed_idx.end()); - + // io::print("rank(" + std::to_string(rank) + ") uniq needed_idx : ", needed_idx); if(*(last - 1) == bad_index) { last = last - 1; } - needed_idx.erase(last, needed_idx.end()); + // // io::print("rank(" + std::to_string(rank) + ") needed_idx : ", needed_idx); + // std::cout << cpp_tools::colors::green << " (" << rank << ") --> End distrib::check_if_morton_index_exist + // " + // << cpp_tools::colors::reset << std::endl + // << std::flush; } - /** - * @brief - * - * @tparam VectorMortonIdx - * @tparam leafInfo - * @tparam MortonDistribution - * @param para - * @param needed_idx - * @param distrib - * @param leaf_info - */ template<typename VectorMortonIdx, typename leafInfo, typename MortonDistribution> - auto check_if_leaf_morton_index_exist(cpp_tools::parallel_manager::parallel_manager& para, + void check_if_leaf_morton_index_exist(cpp_tools::parallel_manager::parallel_manager& para, VectorMortonIdx& needed_idx, const MortonDistribution& distrib, - const leafInfo& leaf_info) -> void + const leafInfo& leaf_info) { auto rank = para.get_process_id(); auto nb_proc = para.get_num_processes(); @@ -1381,21 +1545,24 @@ namespace scalfmm::parallel::utils } #endif + + // io::print("rank(" + std::to_string(rank) + ") needed_idx : ", needed_idx); + // std::cout << cpp_tools::colors::green << " (" << rank + // << ") --> End distrib::check_if_leaf_morton_index_exist " << cpp_tools::colors::reset << + // std::endl + // << std::flush; } - /** - * @brief find the group owning the index - * - * @tparam MortonIdx - * @tparam GroupIteratorType - * @param[in] begin iterator to start search - * @param[in] end iterator to complete the search vector - * @param[in] index the index - * @return GroupIteratorType - */ - template<typename MortonIdx, typename GroupIteratorType> - inline auto find_group_for_index(GroupIteratorType begin, GroupIteratorType end, - const MortonIdx& index) -> GroupIteratorType + /// + /// \brief find the group owning the index + /// + /// \param[in] index the index + /// \param[in] begin iterator to start search + /// \param[in] end iterator to complete the search + /// vector \return the process number + /// + template<typename MortonIdx, typename Group_iterator_t> + inline Group_iterator_t find_group_for_index(Group_iterator_t begin, Group_iterator_t end, const MortonIdx& index) { for(auto grp_ptr = begin; grp_ptr != end; ++grp_ptr) { @@ -1408,22 +1575,20 @@ namespace scalfmm::parallel::utils } return end; } - - /** - * @brief - * - * @tparam GroupIteratorType - * @tparam MortonIdxVectorType - * @tparam Dependencies_t - * @param begin - * @param end - * @param morton_to_send - * @param deps - */ - template<typename GroupIteratorType, typename MortonIdxVectorType, typename Dependencies_t> - inline auto build_dependencies_from_morton_vector(GroupIteratorType begin, GroupIteratorType end, - const MortonIdxVectorType& morton_to_send, - Dependencies_t& deps) -> void + /// @brief Build the vector of dependencies + /// + /// Build the vector of dependencies by adding the first address of the multipoles inside all groups + /// between begin and end + /// @tparam Group_iterator_t + /// @tparam MortonIdxVector_t + /// @tparam Dependencies_t + /// @param begin begin iterator on group + /// @param end end iterator on group + /// @param morton_to_send the Morton index of the cell to send + /// @param deps the vector of dependencies + template<typename Group_iterator_t, typename MortonIdxVector_t, typename Dependencies_t> + void build_multipoles_dependencies_from_morton_vector(Group_iterator_t begin, Group_iterator_t end, + const MortonIdxVector_t& morton_to_send, Dependencies_t& deps) { const int max_idx = morton_to_send.size(); // loop on the groups // Find the group containing the first index @@ -1448,20 +1613,9 @@ namespace scalfmm::parallel::utils } } } - - /** - * @brief - * - * @tparam GroupIteratorType - * @tparam MortonIdxVectorType - * @param first - * @param second - * @param mortons - * @return auto - */ - template<typename GroupIteratorType, typename MortonIdxVectorType> - auto serialise(std::pair<GroupIteratorType, GroupIteratorType> first, - std::pair<GroupIteratorType, GroupIteratorType> second, const MortonIdxVectorType& mortons) + template<typename Group_iterator_t, typename MortonIdxVector_t> + auto serialise(std::pair<Group_iterator_t, Group_iterator_t> first, + std::pair<Group_iterator_t, Group_iterator_t> second, const MortonIdxVector_t& mortons) { } diff --git a/include/scalfmm/tools/fma_dist_loader.hpp b/include/scalfmm/tools/fma_dist_loader.hpp index 7fadf8c4a6679f259b654ae49720e16938005932..28be4b05dd7f37dbdbd973116e6092044c411f9c 100644 --- a/include/scalfmm/tools/fma_dist_loader.hpp +++ b/include/scalfmm/tools/fma_dist_loader.hpp @@ -34,35 +34,14 @@ namespace scalfmm::io using FFmaGenericLoader<FReal, Dimension>::m_verbose; using MPI_Offset = std::size_t; - /** - * @brief Number of particles that the calling process will manage. - * - */ - std::size_t m_local_number_of_particles; + std::size_t m_local_number_of_particles; ///< Number of particles that the calling process will manage. - /** - * @brief - * - */ - MPI_Offset m_idxParticles; + std::size_t m_start; ///< - /** - * @brief number of my first parts in file. - * - */ - std::size_t m_start; + size_t m_headerSize; ///< size of the header in byte - /** - * @brief - * - */ - size_t m_headerSize; - - /** - * @brief - * - */ - const cpp_tools::parallel_manager::parallel_manager* m_parallelManager; + const cpp_tools::parallel_manager::parallel_manager* m_parallelManager; ///< a pointer on the parallel manager + // MPI_Offset m_idxParticles; ///< public: /** @@ -76,7 +55,7 @@ namespace scalfmm::io const bool verbose = false) : FFmaGenericLoader<FReal, Dimension>(inFilename, verbose) , m_local_number_of_particles(0) - , m_idxParticles(0) + // , m_idxParticles(0) , m_headerSize(0) , m_parallelManager(¶) { @@ -176,35 +155,15 @@ namespace scalfmm::io class DistFmaGenericWriter : public FFmaGenericWriter<FReal> { protected: - /** - * @brief - * - */ - const cpp_tools::parallel_manager::parallel_manager* m_parallelManager; + const cpp_tools::parallel_manager::parallel_manager* m_parallelManager; ///< - /** - * @brief - * - */ - bool _writeDone; + // bool _writeDone; ///< - /** - * @brief - * - */ - int m_headerSize; + int m_headerSize; ///< size of the header in byte - /** - * @brief number of data to write for one particle. - * - */ - int _nbDataTowritePerRecord; + int m_nbDataTowritePerRecord; ///< number of data to write for one particle. - /** - * @brief number of particle (global) to write in the file. - * - */ - std::size_t _numberOfParticles; + std::size_t m_numberOfParticles; ///< number of particle (global) to write in the file. using FFmaGenericWriter<FReal>::m_file; #ifdef SCALFMM_USE_MPI @@ -215,6 +174,8 @@ namespace scalfmm::io MPI_File _mpiFile; #endif public: + using base_type = FFmaGenericWriter<FReal>; + using value_type = FReal; /** * @brief Construct a new Dist Fma Generic Writer object * @@ -226,18 +187,22 @@ namespace scalfmm::io * @param inFilename the name of the file to open. * @param para */ - DistFmaGenericWriter(const std::string inFilename, const cpp_tools::parallel_manager::parallel_manager& para) - : FFmaGenericWriter<FReal>(inFilename) + DistFmaGenericWriter(const std::string inFilename, cpp_tools::parallel_manager::parallel_manager const& para, + const bool verbose = false) + : FFmaGenericWriter<FReal>(inFilename, verbose) , m_parallelManager(¶) - , _writeDone(false) + // , _writeDone(false) , m_headerSize(0) - , _nbDataTowritePerRecord(8) - , _numberOfParticles(0) + , m_nbDataTowritePerRecord(8) + , m_numberOfParticles(0) { #ifdef SCALFMM_USE_MPI if(!this->isBinary()) { - std::cout << "DistFmaGenericWriter only works with binary file (.bfma)." << std::endl; + if(para.io_master()) + { + std::cout << "DistFmaGenericWriter only works with binary file (.bfma)." << std::endl; + } std::exit(EXIT_FAILURE); } auto comm = m_parallelManager->get_communicator(); @@ -247,7 +212,7 @@ namespace scalfmm::io // Is it open? if(fileIsOpen != MPI_SUCCESS) { - std::cerr << "Cannot create parallel file, DistFmaGenericWriter constructeur abort." << std::endl; + std::cerr << "Cannot create parallel file, DistFmaGenericWriter constructor abort." << std::endl; std::exit(EXIT_FAILURE); return; } @@ -281,19 +246,20 @@ namespace scalfmm::io std::array<unsigned int, 4> typeFReal = {dataType, nbDataPerRecord, dimension, nb_input_values}; FReal x = boxWidth * FReal(0.5); m_headerSize = 0; - _nbDataTowritePerRecord = nbDataPerRecord; - _numberOfParticles = nbParticles; + m_nbDataTowritePerRecord = nbDataPerRecord; + m_numberOfParticles = nbParticles; if(m_parallelManager->master()) { - FFmaGenericWriter<FReal>::writerBinaryHeader(centerOfBox, boxWidth, nbParticles, typeFReal.data(), 4); - std::cout << "centerOfBox " << centerOfBox << " boxWidth " << boxWidth << " nbParticles " << nbParticles - << " dataType " << dataType << " nbDataPerRecord " << nbDataPerRecord << " dimension " - << dimension << " nb_input_values " << nb_input_values << std::endl; + auto half_Box_width = boxWidth * value_type(0.5); + base_type::writerBinaryHeader(centerOfBox, half_Box_width, nbParticles, typeFReal.data(), 4); + // std::cout << "centerOfBox " << centerOfBox << " half_Box_width " << half_Box_width << " nbParticles " + // << nbParticles << " dataType " << dataType << " nbDataPerRecord " << nbDataPerRecord + // << " dimension " << dimension << " nb_input_values " << nb_input_values << std::endl; #ifdef SCALFMM_USE_MPI - for(auto a: typeFReal) - { - std::cout << "typeFReal " << a << std::endl; - } + // for(auto a: typeFReal) + // { + // std::cout << "typeFReal " << a << std::endl; + // } int sizeType = 0; int ierr = 0; auto mpiInt64 = cpp_tools::parallel_manager::mpi::get_datatype<std::size_t>(); @@ -324,8 +290,8 @@ namespace scalfmm::io MPI_Type_size(mpiReal, &sizeType); m_headerSize += sizeType * (1 + PointType::dimension); // Build the header offset - std::cout << " headerSize " << m_headerSize << std::endl; - FFmaGenericWriter<FReal>::close(); + // std::cout << " headerSize " << m_headerSize << std::endl; + FFmaGenericWriter<FReal>::close(); // Why ????? #endif } #ifdef SCALFMM_USE_MPI @@ -333,7 +299,7 @@ namespace scalfmm::io comm.bcast(&m_headerSize, 1, MPI_INT, 0); // MPI_Bcast(&_headerSize, 1, MPI_INT, 0, m_parallelManager->global().getComm()); - std::cout << " _headerSize " << m_headerSize << std::endl; + // std::cout << " _headerSize " << m_headerSize << std::endl; #endif // MPI_File_close(&_mpiFile); @@ -349,111 +315,94 @@ namespace scalfmm::io * @brief Write all for all particles the position, physical values, potential and forces. * * @tparam TreeType - * @param myOctree the octree + * @param tree the octree * @param nbParticles number of particles. */ template<typename TreeType> - auto writeFromTree(const TreeType& myOctree, const std::size_t& nbParticles) -> void + auto writeFromTree(const TreeType& tree, std::size_t const& nbParticles) -> void { - // // - // // Write the header - // int sizeType = 0, ierr = 0; - // FReal tt = 0.0; - // MPI_Datatype mpistd::size_t_t = m_parallelManager->GetType(nbParticles); - // MPI_Datatype mpiFReal_t = m_parallelManager->GetType(tt); - // MPI_Type_size(mpiFReal_t, &sizeType); - // int myRank = m_parallelManager->global().processId(); - // _headerSize = 0; - // // - // unsigned int typeFReal[2] = {sizeof(FReal), static_cast<unsigned - // int>(_nbDataTowritePerRecord)}; if(myRank == 0) - // { - // ierr = MPI_File_write_at(_mpiFile, 0, &typeFReal, 2, MPI_INT, MPI_STATUS_IGNORE); - // } - // MPI_Type_size(MPI_INT, &sizeType); - // _headerSize += sizeType * 2; - // if(myRank == 0) - // { - // ierr = MPI_File_write_at(_mpiFile, _headerSize, &nbParticles, 1, mpistd::size_t_t, - // MPI_STATUS_IGNORE); - // } - // MPI_Type_size(mpistd::size_t_t, &sizeType); - // _headerSize += sizeType * 1; - // auto centerOfBox = myOctree.getBoxCenter(); - // FReal boxSim[4] = {myOctree.getBoxWidth() * 0.5, centerOfBox.getX(), centerOfBox.getX(), - // centerOfBox.getX()}; - - // if(myRank == 0) - // { - // ierr = MPI_File_write_at(_mpiFile, _headerSize, &boxSim[0], 4, mpiFReal_t, - // MPI_STATUS_IGNORE); - // } - // if(ierr > 0) - // { - // std::cerr << "Error during the construction of the header in " - // "FMpiFmaGenericWriter::writeDistributionOfParticlesFromOctree" - // << std::endl; - // } - // MPI_Type_size(mpiFReal_t, &sizeType); - // _headerSize += sizeType * 4; - // // - // // Construct the local number of particles on my process - // std::size_t nbLocalParticles = 0, maxPartLeaf = 0; - // MortonIndex starIndex = mortonLeafDistribution[2 * myRank], - // endIndex = mortonLeafDistribution[2 * myRank + 1]; - // myOctree.template forEachCellLeaf<typename TreeType::LeafClass_T>( - // [&](typename TreeType::GroupSymbolCellClass_T* gsymb, - // typename TreeType::GroupCellUpClass_T* /* gmul */, - // typename TreeType::GroupCellDownClass_T* /* gloc */, - // typename TreeType::LeafClass_T* leafTarget) { - // if(!(gsymb->getMortonIndex() < starIndex || gsymb->getMortonIndex() > endIndex)) - // { - // auto n = leafTarget->getNbParticles(); - // nbLocalParticles += n; - // maxPartLeaf = std::max(maxPartLeaf, n); - // } - // }); - // std::vector<FReal> particles(maxPartLeaf * _nbDataTowritePerRecord); - // // Build the offset for eaxh processes - // std::size_t before = 0; // Number of particles before me (rank < myrank) - // MPI_Scan(&nbLocalParticles, &before, 1, mpistd::size_t_t, MPI_SUM, - // m_parallelManager->global().getComm()); before -= nbLocalParticles; MPI_Offset offset = - // _headerSize + sizeType * _nbDataTowritePerRecord * before; - // // - // // Write particles in file - // myOctree.template forEachCellLeaf<typename TreeType::LeafClass_T>( - // [&](typename TreeType::GroupSymbolCellClass_T* gsymb, - // typename TreeType::GroupCellUpClass_T* /* gmul */, - // typename TreeType::GroupCellDownClass_T* /* gloc */, - // typename TreeType::LeafClass_T* leafTarget) { - // if(!(gsymb->getMortonIndex() < starIndex || gsymb->getMortonIndex() > endIndex)) - // { - // const std::size_t nbPartsInLeaf = leafTarget->getNbParticles(); - // const FReal* const posX = leafTarget->getPositions()[0]; - // const FReal* const posY = leafTarget->getPositions()[1]; - // const FReal* const posZ = leafTarget->getPositions()[2]; - // const FReal* const physicalValues = leafTarget->getPhysicalValues(); - // const FReal* const forceX = leafTarget->getForcesX(); - // const FReal* const forceY = leafTarget->getForcesY(); - // const FReal* const forceZ = leafTarget->getForcesZ(); - // const FReal* const potential = leafTarget->getPotentials(); - // for(int i = 0, k = 0; i < nbPartsInLeaf; ++i, k += _nbDataTowritePerRecord) - // { - // particles[k] = posX[i]; - // particles[k + 1] = posY[i]; - // particles[k + 2] = posZ[i]; - // particles[k + 3] = physicalValues[i]; - // particles[k + 4] = potential[i]; - // particles[k + 5] = forceX[i]; - // particles[k + 6] = forceY[i]; - // particles[k + 7] = forceZ[i]; - // } - // MPI_File_write_at(_mpiFile, offset, particles.data(), - // static_cast<int>(_nbDataTowritePerRecord * nbPartsInLeaf), - // mpiFReal_t, MPI_STATUS_IGNORE); - // offset += sizeType * _nbDataTowritePerRecord * nbPartsInLeaf; - // } - // }); + // The header is already written + static constexpr int dimension = TreeType::dimension; + using int64 = int; + + int maxPartLeaf{0}, nbLocalParticles{0}; + scalfmm::component::for_each_mine_leaf(tree.begin_mine_leaves(), tree.end_mine_leaves(), + [&nbLocalParticles, &maxPartLeaf](auto& leaf) + { + auto n = static_cast<int>(leaf.size()); + nbLocalParticles += n; + maxPartLeaf = std::max(maxPartLeaf, n); + ; + }); + // Build the offset for each processes + int64 before{0}; // Number of particles before me (rank < myrank) + int sizeRealType{0}, ierr{0}; + + auto mpiReal = cpp_tools::parallel_manager::mpi::get_datatype<FReal>(); + MPI_Type_size(mpiReal, &sizeRealType); + MPI_Datatype mpiInt64 = cpp_tools::parallel_manager::mpi::get_datatype<int64>(); + + // #ifdef SCALFMM_USE_MPI + auto comm = m_parallelManager->get_communicator(); + MPI_Scan(&nbLocalParticles, &before, 1, mpiInt64, MPI_SUM, comm); + before -= nbLocalParticles; + // std::cout << " nbLocalParticles " << nbLocalParticles << " maxPartLeaf " << maxPartLeaf << " before " + // << before << std::endl + // << std::flush; + // + MPI_Offset offset = m_headerSize + sizeRealType * m_nbDataTowritePerRecord * before; + // std::cout << " offset to write part " << offset << std::endl; + // + // Write particles in file + + using value_type = typename TreeType::leaf_type::value_type; + static constexpr int nb_elt_per_par = + dimension + TreeType::particle_type::inputs_size + TreeType::particle_type::outputs_size; + // std::cout << "nb_elt_per_par " << nb_elt_per_par << std::endl; + using particles_t = std::array<value_type, nb_elt_per_par>; + std::vector<particles_t> particles(nbLocalParticles); + // std::vector<FReal> particles(maxPartLeaf * m_nbDataTowritePerRecord); + + // + scalfmm::component::for_each_mine_leaf( + tree.begin_mine_leaves(), tree.end_mine_leaves(), + [this, &offset, &particles, &sizeRealType, &mpiReal](auto& leaf) + { + int pos = 0; + auto nbPartsInLeaf = leaf.size(); + // std::cout << " leaf index " << leaf.index() << " nbpart " << leaf.size() << std::endl; + for(auto const& it_p: leaf) + { + auto& particles_elem = particles[pos++]; + const auto& p = typename TreeType::leaf_type::particle_type(it_p); + // + int i = 0; + const auto points = p.position(); + for(int k = 0; k < dimension; ++k, ++i) + { + particles_elem[i] = points[k]; + } + // get inputs + for(int k = 0; k < TreeType::particle_type::inputs_size; ++k, ++i) + { + particles_elem[i] = p.inputs(k); + } + // get outputs + for(int k = 0; k < TreeType::particle_type::outputs_size; ++k, ++i) + { + particles_elem[i] = p.outputs(k); + } + // std::cout << " " << pos << " part " << particles_elem << std::endl; + } + // std::cout << " write to ptr_data " << particles.data() << " size " + // << static_cast<int>(m_nbDataTowritePerRecord * nbPartsInLeaf) << std::endl; + MPI_File_write_at(_mpiFile, offset, particles.data(), + static_cast<int>(m_nbDataTowritePerRecord * nbPartsInLeaf), mpiReal, + MPI_STATUS_IGNORE); + offset += sizeRealType * m_nbDataTowritePerRecord * nbPartsInLeaf; + // std::cout << " next offset to write part " << offset << std::endl; + // std::cout << std::endl << std::flush; + }); #ifdef SCALFMM_USE_MPI MPI_File_close(&_mpiFile); diff --git a/include/scalfmm/tools/fma_loader.hpp b/include/scalfmm/tools/fma_loader.hpp index 5c99f314c83c07fb10baf47599990cc6d77fe212..0cd0ba123b0945836ced7868f889b00ec024266b 100644 --- a/include/scalfmm/tools/fma_loader.hpp +++ b/include/scalfmm/tools/fma_loader.hpp @@ -76,60 +76,16 @@ namespace scalfmm::io class FFmaGenericLoader { protected: - /** - * @brief the stream used to read the file. - * - */ - std::fstream* m_file; - - /** - * @brief if true the file to read is in binary mode. - * - */ - bool m_binaryFile; - - /** - * @brief the center of box (read from file). - * - */ - container::point<FReal, Dimension> m_centerOfBox; - - /** - * @brief the center of box (read from file). - * - */ - std::vector<FReal> m_center{}; - - /** - * @brief the box width (read from file). - * - */ - FReal m_boxWidth; - - /** - * @brief the number of particles (read from file) - * - */ - std::size_t m_nbParticles; - - /** - * @brief Size of the data to read, number of data on 1 line, - * dimension of space and number of input values - * - */ - std::array<unsigned int, 4> m_typeData; - - /** - * @brief file name containung the data. - * - */ - std::string m_filename; - - /** - * @brief Verbose mode. - * - */ - bool m_verbose; + std::fstream* m_file; ///< the stream used to read the file + bool m_binaryFile; ///< if true the file to read is in binary mode + container::point<FReal, Dimension> m_centerOfBox; ///< The center of box (read from file) + std::vector<FReal> m_center{}; ///< The center of box (read from file) + FReal m_boxWidth; ///< the box width (read from file) + std::size_t m_nbParticles; ///< the number of particles (read from file) + std::array<unsigned int, 4> m_typeData; ///< Size of the data to read, number of data on 1 line, dimension + ///< of space and number of input values + std::string m_filename; ///< file name containung the data + bool m_verbose; ///< Verbose mode private: /** @@ -250,25 +206,31 @@ namespace scalfmm::io /** * @brief To know if the file is open and ready to read. - * - * @return true - * @return false + * @return true if loader can work */ - inline auto isOpen() const -> bool { return this->m_file->is_open() && !this->m_file->eof(); } + bool isOpen() const { return this->m_file->is_open() && !this->m_file->eof(); } /** * @brief To get the number of particles from this loader - * - * @return std::size_t */ - inline auto getNumberOfParticles() const -> std::size_t { return this->getParticleCount(); } + std::size_t getNumberOfParticles() const { return this->getParticleCount(); } + /** + * @brief To get the number of particles from this loader + */ + std::size_t getMyNumberOfParticles() const { return this->getParticleCount(); } /** * @brief To get the center of the box from the simulation file opened by the loader. * + * @return box center (type Point) */ inline auto getCenterOfBox() const -> container::point<FReal, Dimension> { return this->getBoxCenter(); } - + /** + * @brief Get the center of the box contining the particles + * + * @return A point (ontainer::point<FReal>) representing the box center + */ + inline auto getBoxCenter() const { return this->m_centerOfBox; } /** * @brief Returns a pointer on the element of the Box center. * @@ -283,13 +245,6 @@ namespace scalfmm::io */ inline auto getParticleCount() const -> std::size_t { return this->m_nbParticles; } - /** - * @brief Get the center of the box contining the particles - * - * @return A point (ontainer::point<FReal>) representing the box center - */ - inline auto getBoxCenter() const { return this->m_centerOfBox; } - /** * @brief box width from the simulation file opened by the loader * @@ -620,10 +575,13 @@ namespace scalfmm::io * * @param filename the name of the file to open. */ - FFmaGenericWriter(const std::string& filename) + FFmaGenericWriter(std::string const& filename, const bool verbose = true) : m_binaryFile(false) { - std::cout << "FFmaGenericWriter filename " << filename << std::endl; + if(verbose) + { + std::cout << "FFmaGenericWriter filename " << filename << std::endl; + } std::string ext(".bfma"); // open particle file if(filename.find(".bfma") != std::string::npos) @@ -649,44 +607,40 @@ namespace scalfmm::io std::cerr << "File " << filename << " not opened! " << std::endl; std::exit(EXIT_FAILURE); } - std::cout << "FFmaGenericWriter file " << filename << " opened" << std::endl; - } - - /** - * @brief Construct a new FFmaGenericWriter object - * - * This constructor opens a file to be written to. - * - * @param filename the name of the file to open. - * @param binary true if the file to open is in binary mode - */ - FFmaGenericWriter(const std::string& filename, const bool binary) - : m_file(nullptr) - , m_binaryFile(binary) - { - if(binary) - { - this->m_file = new std::fstream(filename.c_str(), std::ifstream::out | std::ios::binary); - } - else - { - this->m_file = new std::fstream(filename.c_str(), std::ifstream::out); - this->m_file->precision(std::numeric_limits<FReal>::digits10); - } - // test if open - if(!this->m_file->is_open()) + if(verbose) { - std::cerr << "File " << filename << " not opened! " << std::endl; - std::exit(EXIT_FAILURE); + std::cout << "FFmaGenericWriter file " << filename << " opened" << std::endl; } - std::cout << "FFmaGenericWriter file " << filename << " opened" << std::endl; } - /** - * @brief - * - */ - inline auto close() -> void + // /** + // * This constructor opens a file to be written to. + // * + // * @param filename the name of the file to open. + // * @param binary true if the file to open is in binary mode + // */ + // FFmaGenericWriter(const std::string& filename, const bool binary) + // : m_file(nullptr) + // , m_binaryFile(binary) + // { + // if(binary) + // { + // this->m_file = new std::fstream(filename.c_str(), std::ifstream::out | std::ios::binary); + // } + // else + // { + // this->m_file = new std::fstream(filename.c_str(), std::ifstream::out); + // this->m_file->precision(std::numeric_limits<FReal>::digits10); + // } + // // test if open + // if(!this->m_file->is_open()) + // { + // std::cerr << "File " << filename << " not opened! " << std::endl; + // std::exit(EXIT_FAILURE); + // } + // std::cout << "FFmaGenericWriter file " << filename << " opened" << std::endl; + // } + void close() { m_file->close(); delete m_file; @@ -911,32 +865,33 @@ namespace scalfmm::io std::vector<particles_t> particles(number_particles); // int pos = 0; - scalfmm::component::for_each_leaf(std::cbegin(tree), std::cend(tree), - [&pos, &particles](auto& leaf) - { - for(auto const& it_p: leaf) - { - auto& particles_elem = particles[pos++]; - const auto& p = typename TreeType::leaf_type::particle_type(it_p); - // - int i = 0; - const auto points = p.position(); - for(int k = 0; k < dimension; ++k, ++i) - { - particles_elem[i] = points[k]; - } - // get inputs - for(int k = 0; k < nb_input_elements; ++k, ++i) - { - particles_elem[i] = p.inputs(k); - } - // get outputs - for(int k = 0; k < nb_output_elements; ++k, ++i) - { - particles_elem[i] = p.outputs(k); - } - } - }); + scalfmm::component::for_each_mine_leaf(tree.begin_mine_leaves(), tree.end_mine_leaves(), + [&pos, &particles](auto& leaf) + { + for(auto const& it_p: leaf) + { + auto& particles_elem = particles[pos++]; + const auto& p = + typename TreeType::leaf_type::particle_type(it_p); + // + int i = 0; + const auto points = p.position(); + for(int k = 0; k < dimension; ++k, ++i) + { + particles_elem[i] = points[k]; + } + // get inputs + for(int k = 0; k < nb_input_elements; ++k, ++i) + { + particles_elem[i] = p.inputs(k); + } + // get outputs + for(int k = 0; k < nb_output_elements; ++k, ++i) + { + particles_elem[i] = p.outputs(k); + } + } + }); // // write the particles const auto& centre = tree.box_center(); diff --git a/include/scalfmm/tree/dist_group_tree.hpp b/include/scalfmm/tree/dist_group_tree.hpp index e7d1b40af66b9c07e08a06b5bfc6693f77b92c86..fae3d95612a757ff0705013da707aa120f8b6930 100644 --- a/include/scalfmm/tree/dist_group_tree.hpp +++ b/include/scalfmm/tree/dist_group_tree.hpp @@ -6,13 +6,6 @@ #ifndef SCALFMM_TREE_DIST_GROUP_TREE_HPP #define SCALFMM_TREE_DIST_GROUP_TREE_HPP -#include "scalfmm/tree/box.hpp" -#include "scalfmm/tree/group_let.hpp" -#include "scalfmm/tree/group_tree_view.hpp" -#include "scalfmm/utils/io_helpers.hpp" - -#include <cpp_tools/colors/colorized.hpp> - #include <array> #include <fstream> #include <iostream> @@ -21,6 +14,16 @@ #include <utility> #include <vector> +#include "scalfmm/tree/box.hpp" +#include <scalfmm/tree/group_let.hpp> +#include <scalfmm/tree/group_tree_view.hpp> +#include <scalfmm/utils/io_helpers.hpp> + +#include <scalfmm/parallel/comm_access.hpp> + +#include <cpp_tools/colors/colorized.hpp> +#include <cpp_tools/parallel_manager/parallel_manager.hpp> + namespace scalfmm::component { /** @@ -40,9 +43,13 @@ namespace scalfmm::component using base_type = group_tree_view<Cell, Leaf, Box>; using leaf_iterator_type = typename base_type::leaf_iterator_type; using const_leaf_iterator_type = typename base_type::const_leaf_iterator_type; + using cell_iterator_type = typename base_type::cell_iterator_type; using cell_group_level_iterator_type = typename base_type::cell_group_level_type::iterator; using iterator_type = typename base_type::iterator_type; using const_iterator_type = typename base_type::const_iterator_type; + using grp_access_type = std::pair<cell_group_level_iterator_type, int>; + + static constexpr std::size_t dimension = base_type::box_type::dimension; /** * @brief Construct a new dist group tree object @@ -61,9 +68,29 @@ namespace scalfmm::component : base_type(tree_height, order, size_leaf_blocking, size_cell_blocking, box) , m_parallel_manager(parallel_manager) , m_level_shared{level_shared} + , transfer_acces(tree_height, parallel_manager.get_num_processes()) { m_cell_distrib.resize(tree_height); + up_down_acces.resize(tree_height); + + // transfer_acces.m_receive_multipoles_type.resize(tree_height); + // transfer_acces.m_send_multipoles_type.resize(tree_height); + // transfer_acces.m_send_morton.resize(tree_height); + // for(auto& vec: transfer_acces.m_send_morton) + // { + // vec.resize(parallel_manager.get_num_processes()); + // } + // m_receive_cells_access.resize(tree_height); + // for(auto& vec: transfer_acces.m_receive_cells_access) + // { + // vec.resize(parallel_manager.get_num_processes()); + // } + // m_send_cells_access.resize(tree_height); + // for(auto& vec: transfer_acces.m_send_cells_access) + // { + // vec.resize(parallel_manager.get_num_processes()); + // } } // template<typename ParticleContainer> @@ -267,9 +294,10 @@ namespace scalfmm::component VectorMortonIndexType const& ghosts_m2l, VectorMortonIndexType const& ghosts_m2m, const std::int64_t& ghost_l2l, data_distrib_value_type const& cell_distrib) -> void { - //io::print("create_cells_at_level mortonIdx", mortonIdx); - //io::print("ghosts_m2l", ghosts_m2l); - //io::print("ghosts_m2m", ghosts_m2m); + io::print(std::clog, "create_cells_at_level mortonIdx", mortonIdx); + io::print(std::clog, "ghosts_m2l", ghosts_m2l); + io::print(std::clog, "ghosts_m2m", ghosts_m2m); + std::clog << std::endl << std::flush; // construct group of cells at leaf level auto first_index = cell_distrib[0]; @@ -288,20 +316,23 @@ namespace scalfmm::component ghost_left_mortonIdx.back() = ghost_l2l; } - //io::print("create_from_leaf : ghost_left_mortonIdx ", ghost_left_mortonIdx); + // io::print("create_from_leaf : ghost_left_mortonIdx ", ghost_left_mortonIdx); this->build_groups_of_cells_at_level(ghost_left_mortonIdx, level, false); this->build_cells_in_groups_at_level(ghost_left_mortonIdx, base_type::m_box, level); - //io::print("ghost_left_mortonIdx ", ghost_left_mortonIdx); + // io::print("ghost_left_mortonIdx ", ghost_left_mortonIdx); + // std::cout << std::endl << std::flush; auto left_block_cells = std::move(base_type::m_group_of_cell_per_level.at(level)); + // std::cout << " merge_unique_fast " << std::endl << std::flush; auto ghost_right_mortonIdx = scalfmm::parallel::utils::merge_unique_fast<VectorMortonIndexType>( last, ghosts_m2l.end(), ghosts_m2m.begin(), ghosts_m2m.end()); - //io::print("create_from_leaf : ghost_right_mortonIdx ", ghost_right_mortonIdx); + // io::print("create_from_leaf : ghost_right_mortonIdx ", ghost_right_mortonIdx); + std::cout << std::endl << std::flush; + this->build_groups_of_cells_at_level(ghost_right_mortonIdx, level, false); this->build_cells_in_groups_at_level(ghost_right_mortonIdx, base_type::m_box, level); - auto right_block_cells = std::move(base_type::m_group_of_cell_per_level.at(level)); this->build_groups_of_cells_at_level(mortonIdx, level); this->build_cells_in_groups_at_level(mortonIdx, base_type::m_box, level); @@ -309,15 +340,17 @@ namespace scalfmm::component auto local_block_cells = std::move(base_type::m_group_of_cell_per_level.at(level)); auto all_cells_blocks = scalfmm::tree::let::merge_blocs(left_block_cells, local_block_cells, right_block_cells); - // std::cout << " All cells blocks at level " << level << " size: " << all_cells_blocks.size() << - // std::endl; int tt{0}; for(auto pg: all_cells_blocks) + // std::cout << " All cells blocks at level " << level << " size: " << all_cells_blocks.size() << std::endl + // << std::flush; + // int tt{0}; + // for(auto pg: all_cells_blocks) // { // std::cout << "block index " << tt++ << " "; // pg->print(); - // std::cout << std::endl; + // std::cout << std::endl << std::flush; // // pg->cstorage().print_block_data(std::cout); // } - // std::cout << std::endl; + // std::cout << std::endl << std::flush; base_type::m_group_of_cell_per_level.at(level) = std::move(all_cells_blocks); auto& grp_level = base_type::m_group_of_cell_per_level.at(level); int idx{0}; @@ -407,8 +440,10 @@ namespace scalfmm::component auto leaf_level = base_type::m_tree_height - 1; std::int64_t ghost_l2l_cell{-1}; + // std::cout << " this->create_cells_at_level \n" << std::flush; this->create_cells_at_level(leaf_level, mortonIdx, ghosts_m2l, ghosts_m2m, ghost_l2l_cell, cell_distrib); + // std::cout << " END create_from_leaf_level \n" << std::flush; // } @@ -600,12 +635,13 @@ namespace scalfmm::component template<typename ParticleContainer> auto fill_leaves_with_particles(ParticleContainer const& particle_container) -> void { +#ifndef TOTO // using scalfmm::details::tuple_helper; // using proxy_type = typename particle_type::proxy_type; // using const_proxy_type = typename particle_type::const_proxy_type; // using outputs_value_type = typename particle_type::outputs_value_type; auto begin_container = std::begin(particle_container); - std::size_t group_index{0}; + std::size_t group_index{0}, part_src_index{0}; // std::cout << cpp_tools::colors::red << " particle_container.size() " << particle_container.size() // << std::endl; // std::cout << " nb of mine grp " @@ -615,21 +651,21 @@ namespace scalfmm::component for(auto pg = base_type::cbegin_mine_leaves(); pg != base_type::cend_mine_leaves(); ++pg) { auto& group = *(pg->get()); - std::size_t part_src_index{0}; std::size_t leaf_index{0}; auto leaves_view = group.components(); // loop on leaves - for(auto const& leaf: group.components()) + for(auto const& leaf: leaves_view) { // get the leaf container auto leaf_container_begin = leaf.particles().first; - // std::cout << " nb part in leaf " << leaf.index() << " leaf.size() " << leaf.size() << std::endl + // std::cout << " leaf index " << leaf.index() << " leaf.size() " << leaf.size() << std::endl // << std::flush; // copy the particle in the leaf for(std::size_t index_part = 0; index_part < leaf.size(); ++index_part) { - // std::cout << " index_part " << index_part << " part_src_index " << part_src_index << std::endl + // std::cout << " --> index_part " << index_part << " part_src_index " << part_src_index + // << std::endl // << std::flush; // get the source index in the source container // auto source_index = std::get<1>(tuple_of_indexes.at(part_src_index)); @@ -639,7 +675,8 @@ namespace scalfmm::component // copy the particle // *leaf_container_begin = particle_container.particle(source_index).as_tuple(); - // std::cout << part_src_index << " p " << particle_container.at(part_src_index) << std::endl; + // std::cout << " --> " << part_src_index << " p " + // << particle_container.at(part_src_index) << std::endl; *leaf_container_begin = particle_container.at(part_src_index).as_tuple(); // proxy_type particle_in_leaf(*leaf_container_begin); @@ -657,16 +694,17 @@ namespace scalfmm::component ++group_index; // std::cout << " group " << group << std::endl; } - // #ifdef _DEBUG_BLOCK_DATA - // std::clog << " FINAL block\n"; - // int tt{0}; - // for(auto pg: m_group_of_leaf) - // { - // std::clog << "block index " << tt++ << std::endl; - // pg->cstorage().print_block_data(std::clog); - // } - // std::clog << " ---------------------------------------------------\n"; - // #endif +#ifdef _DEBUG_BLOCK_DATA + std::clog << " FINAL block\n"; + int tt{0}; + for(auto pg: m_group_of_leaf) + { + std::clog << "block index " << tt++ << std::endl; + pg->cstorage().print_block_data(std::clog); + } + std::clog << " ---------------------------------------------------\n"; +#endif +#endif } /** @@ -679,6 +717,56 @@ namespace scalfmm::component return m_parallel_manager; } + auto inline get_send_multipole_types(const int& level) -> std::vector<MPI_Datatype>& + { + return transfer_acces.get_send_multipole_types(level); + } + + // auto inline print_send_multipole_types(const int& level) -> void + // { + // auto const& type = m_send_multipoles_type[level]; + // for(int p = 0; p < type.size(); ++p) + // { + // std::cout << " ptr_data_type(" << p << ") " << &(type[p]) << " level: " << level << std::endl + // << std::flush; + // } + // } + auto inline get_receive_multipole_types(const int& level) -> std::vector<MPI_Datatype>& + { + return transfer_acces.get_receive_multipole_types(level); + } + auto inline send_morton_indexes(int const& level, int const& proc) -> std::vector<morton_type>& + { + return transfer_acces.send_morton_indexes(level, proc); + } + auto inline send_morton_indexes(int const& level) -> std::vector<std::vector<morton_type>>& + { + return transfer_acces.send_morton_indexes(level); + } + auto inline receive_cells_access(int const& level) -> std::vector<std::vector<grp_access_type>>& + { + return transfer_acces.receive_cells_access(level); + } + + auto inline send_cells_access(int const& level) -> std::vector<std::vector<grp_access_type>>& + { + return transfer_acces.send_cells_access(level); + } + // auto inline set_receive_access(const int level, std::vector<std::vector<grp_access_type>>& access) -> void + // { + // m_receive_access[level] = std::move(access); + // } + // auto inline get_ptr_send_access(const int& level) -> grp_access_type* + // { + // return m_send_multipoles_type[level].data(); + // } + // auto inline get_ptr_receive_access(const int& level) -> grp_access_type* + // { + // return m_receive_multipoles_type[level].data(); + // } + auto get_up_down_access(const int level) -> UpDownDataAccess& { return up_down_acces[level]; } + auto get_up_down_access(const int level) const -> UpDownDataAccess const& { return up_down_acces[level]; } + private: /** * @brief a reference on the parallel manager @@ -703,6 +791,18 @@ namespace scalfmm::component * */ int m_level_shared; + /// + // Structure for communications in transfert step + + // std::vector<std::vector<MPI_Datatype>> m_send_multipoles_type; + // std::vector<std::vector<MPI_Datatype>> m_receive_multipoles_type; + // // vector per level and per process + // std::vector<std::vector<std::vector<morton_type>>> m_send_morton; + // std::vector<std::vector<std::vector<grp_access_type>>> m_receive_cells_access; + // std::vector<std::vector<std::vector<grp_access_type>>> m_send_cells_access; + + std::vector<UpDownDataAccess> up_down_acces; + transferDataAccess<morton_type, grp_access_type> transfer_acces; }; } // namespace scalfmm::component diff --git a/include/scalfmm/tree/for_each.hpp b/include/scalfmm/tree/for_each.hpp index 449e4da07e3641d38191c56ba753846c9e8ff9b5..67d512422f0fa5b82a8586449d02931e8b17f42e 100644 --- a/include/scalfmm/tree/for_each.hpp +++ b/include/scalfmm/tree/for_each.hpp @@ -138,7 +138,27 @@ namespace scalfmm::component return f; } + /// @brief For iterating on the components between the begin and the end iterator + /// @tparam InputTreeIterator + /// @tparam UnaryFunction + /// @param begin + /// @param end + /// @param f + /// @return + template<typename InputTreeIterator, typename UnaryFunction> + inline auto for_each_mine_component(InputTreeIterator begin, InputTreeIterator end, + UnaryFunction f) -> UnaryFunction + { + for(auto group_leaf_iterator_begin = begin; group_leaf_iterator_begin != end; ++group_leaf_iterator_begin) + { + for(auto&& leaf: (*group_leaf_iterator_begin)->components()) + { + f(leaf); + } + } + return f; + } /** * @brief iterate en two (same) leaf struture (same groupe size) * diff --git a/include/scalfmm/tree/group_let.hpp b/include/scalfmm/tree/group_let.hpp index aa52cfc47b87263389a93a20bed6e0c149a96674..1b7e1fd7b639d955bda9146a606d053531cf2f36 100644 --- a/include/scalfmm/tree/group_let.hpp +++ b/include/scalfmm/tree/group_let.hpp @@ -1,9 +1,21 @@ -// -------------------------------- -// See LICENCE file at project root -// File : scalfmm/tree/group_let.hpp -// -------------------------------- -#ifndef SCALFMM_TREE_LET_HPP +#ifndef SCALFMM_TREE_LET_HPP #define SCALFMM_TREE_LET_HPP +#include <algorithm> +#include <array> +#include <fstream> +#include <iostream> +#include <iterator> +#include <string> +#include <tuple> +#include <utility> +#include <vector> + +#include <cpp_tools/colors/colorized.hpp> +#include <cpp_tools/parallel_manager/parallel_manager.hpp> + +#include <scalfmm/tree/utils.hpp> +#include <scalfmm/utils/io_helpers.hpp> // for io::print +#include <scalfmm/utils/math.hpp> #include "scalfmm/container/particle_container.hpp" #include "scalfmm/lists/sequential.hpp" @@ -12,11 +24,8 @@ #include "scalfmm/parallel/mpi/utils.hpp" #include "scalfmm/parallel/utils.hpp" #include "scalfmm/tree/for_each.hpp" -#include "scalfmm/tree/utils.hpp" -#include "scalfmm/utils/io_helpers.hpp" -#include "scalfmm/utils/math.hpp" - #ifdef SCALFMM_USE_MPI + #include <inria/algorithm/distributed/distribute.hpp> #include <inria/algorithm/distributed/mpi.hpp> #include <inria/algorithm/distributed/sort.hpp> @@ -24,33 +33,15 @@ #include <mpi.h> #endif -#include <cpp_tools/colors/colorized.hpp> -#include <cpp_tools/parallel_manager/parallel_manager.hpp> - -#include <algorithm> -#include <array> -#include <fstream> -#include <iostream> -#include <iterator> -#include <string> -#include <tuple> -#include <utility> -#include <vector> - namespace scalfmm::tree { using morton_type = std::int64_t; // typename Tree_type:: - /** - * @brief - * - * @tparam MortonIdxType - */ - template<typename MortonIdxType> + template<typename MortonIdx> struct leaf_info_type { - using morton_type = MortonIdxType; - MortonIdxType morton{}; + using morton_type = MortonIdx; + MortonIdx morton{}; std::size_t number_of_particles{}; friend std::ostream& operator<<(std::ostream& os, const leaf_info_type& w) { @@ -62,18 +53,11 @@ namespace scalfmm::tree namespace let { - /** - * @brief - * - * @tparam BoxType - * @tparam VectorLeafInfoType - * @tparam MortonDistributionType - */ - template<typename BoxType, typename VectorLeafInfoType, typename MortonDistributionType> - inline /*std::vector<morton_type>*/ VectorLeafInfoType - get_ghosts_p2p_interaction(cpp_tools::parallel_manager::parallel_manager& para, BoxType const& box, - std::size_t const& level, int const& separation, VectorLeafInfoType const& leaf_info, - MortonDistributionType const& leaves_distrib) + template<typename Box, typename VectorLeafInfo, typename MortonDistribution> + inline /*std::vector<morton_type>*/ VectorLeafInfo + get_ghosts_p2p_interaction(cpp_tools::parallel_manager::parallel_manager& para, Box const& box, + std::size_t const& level, int const& separation, VectorLeafInfo const& leaf_info, + MortonDistribution const& leaves_distrib) { std::vector<morton_type> ghost_to_add; auto const& period = box.get_periodicity(); @@ -83,14 +67,17 @@ namespace scalfmm::tree for(auto const& info: leaf_info) { auto const& morton_index = info.morton; - auto coordinate{index::get_coordinate_from_morton_index<BoxType::dimension>(morton_index)}; + auto coordinate{index::get_coordinate_from_morton_index<Box::dimension>(morton_index)}; auto interaction_neighbors = index::get_neighbors(coordinate, level, period, separation); auto& list = std::get<0>(interaction_neighbors); auto nb = std::get<1>(interaction_neighbors); int it{0}; + //io::print("rank(" + std::to_string(rank) + ") list idx(p2p) : ", list); while(list[it] < my_distrib[0]) { + // std::cout << "INSIDE left idx " << list[it] << " " << std::boolalpha + // << parallel::utils::is_inside_distrib(list[it], leaves_distrib) << std::endl; if(parallel::utils::is_inside_distrib_left(list[it], rank, leaves_distrib)) { ghost_to_add.push_back(list[it]); @@ -100,6 +87,8 @@ namespace scalfmm::tree it = nb - 1; while(list[it] >= my_distrib[1]) { + // std::cout << "INSIDE right idx " << list[it] << " " << std::boolalpha + // << parallel::utils::is_inside_distrib(list[it], leaves_distrib) << std::endl; if(parallel::utils::is_inside_distrib_right(list[it], rank, leaves_distrib)) { ghost_to_add.push_back(list[it]); @@ -110,7 +99,7 @@ namespace scalfmm::tree std::sort(ghost_to_add.begin(), ghost_to_add.end()); auto last = std::unique(ghost_to_add.begin(), ghost_to_add.end()); ghost_to_add.erase(last, ghost_to_add.end()); - VectorLeafInfoType ghost_leaf_to_add(ghost_to_add.size()); + VectorLeafInfo ghost_leaf_to_add(ghost_to_add.size()); for(int i = 0; i < ghost_to_add.size(); ++i) { ghost_leaf_to_add[i] = {ghost_to_add[i], 0}; @@ -118,32 +107,25 @@ namespace scalfmm::tree return ghost_leaf_to_add; } - - /** - * @brief get theoretical m2l interaction list outside me - * - * We return the list of indexes of cells involved in P2P interaction that we do - * not have locally. The cells on other processors may not exist. - * - * @tparam BoxType - * @tparam VectorMortonIdxType - * @tparam MortonDistributionType - * @param[in] para the parallel manager - * @param box - * @param level - * @param separation - * @param local_morton_vect - * @param cell_distrib the cells distribution on the processes - * @return the list of indexes on tother processes - */ - template<typename BoxType, typename VectorMortonIdxType, typename MortonDistributionType> - inline VectorMortonIdxType get_ghosts_m2l_interaction(cpp_tools::parallel_manager::parallel_manager& para, - BoxType const& box, const std::size_t& level, - int const& separation, - VectorMortonIdxType const& local_morton_vect, - MortonDistributionType const& cell_distrib) + /// + /// \brief get theoretical m2l interaction list outside me + /// + /// We return the list of indexes of cells involved in P2P interaction that we do + /// not have locally. The cells on other processors may not exist. + /// + /// \param[in] para the parallel manager + /// \param tree the tree used to compute the interaction + /// \param local_morton_idx the local morton index of the cells + /// \param cell_distrib the cells distribution on the processes + /// \return the list of indexes on tother processes + /// + template<typename Box, typename VectorMortonIdx, typename MortonDistribution> + inline VectorMortonIdx + get_ghosts_m2l_interaction(cpp_tools::parallel_manager::parallel_manager& para, Box const& box, + const std::size_t& level, int const& separation, + VectorMortonIdx const& local_morton_vect, MortonDistribution const& cell_distrib) { - VectorMortonIdxType ghost_to_add; + VectorMortonIdx ghost_to_add; auto const& period = box.get_periodicity(); const auto rank = para.get_process_id(); auto const my_distrib = cell_distrib[rank]; @@ -152,36 +134,64 @@ namespace scalfmm::tree for(auto morton_index: local_morton_vect) { // for each index in the vector of cells in local_morton_vect we compute the m2l interactions - auto coordinate{index::get_coordinate_from_morton_index<BoxType::dimension>(morton_index)}; + auto coordinate{index::get_coordinate_from_morton_index<Box::dimension>(morton_index)}; auto interaction_m2l_list = index::get_m2l_list(coordinate, level, period, separation); auto& list = std::get<0>(interaction_m2l_list); auto nb = std::get<2>(interaction_m2l_list); + // + // io::print("rank(" + std::to_string(rank) + ") list idx(m2l) : ", list); + // io::print("rank(" + std::to_string(rank) + ") my_distrib : ", my_distrib); int it{0}; // We check if the cells are in the distribution for(auto it = 0; it < nb; ++it) { + // if(list[it] > my_distrib[0]) + // std::cout << list[it] << " " << std::boolalpha + // << math::between(list[it], my_distrib[0], my_distrib[1]) << std::endl; + if(math::between(list[it], my_distrib[0], my_distrib[1])) { break; } bool check{false}; + // for(int i = 0; i < rank; ++i) for(int i = rank - 1; i >= 0; i--) { auto const& interval = cell_distrib[i]; + // // if(rank == 2) + // { + // std::cout << "parallel::utils::is_inside_distrib_left list[it]: " << interval[0] << " < " + // << list[it] + // << " < " << interval[1] << std::endl; + // } check = math::between(list[it], interval[0], interval[1]); if(check) { break; } } + // std::cout << " " << list[it] << " " << std::boolalpha << check << std::endl; if(check) // parallel::utils::is_inside_distrib_left(list[it], rank, cell_distrib)) { ghost_to_add.push_back(list[it]); } } - + // while(list[it] < my_distrib[0]) + // { + // std::cout << it << " INSIDE left idx " << list[it] << " " << std::boolalpha + // << parallel::utils::is_inside_distrib(list[it], cell_distrib) << std::endl; + // if(parallel::utils::is_inside_distrib_left(list[it], rank, cell_distrib)) + // { + // ghost_to_add.push_back(list[it]); + // } + // ++it; + // if(it > nb) + // { + // break; + // } + // } it = nb - 1; if(not last_proc) // No ghost on the right on last process { @@ -194,22 +204,20 @@ namespace scalfmm::tree --it; } } + // if(rank == 2) + // { + // io::print("rank(" + std::to_string(rank) + ") tmp ghost_to_add(m2l) : ", ghost_to_add); + // } } std::sort(ghost_to_add.begin(), ghost_to_add.end()); auto last = std::unique(ghost_to_add.begin(), ghost_to_add.end()); ghost_to_add.erase(last, ghost_to_add.end()); + // io::print("rank(" + std::to_string(rank) + ") cell_distrib: ", cell_distrib); + // io::print("rank(" + std::to_string(rank) + ") ghost_to_add(m2l): ", ghost_to_add); return ghost_to_add; } - /** - * @brief - * - * @tparam VectorLeafInfoType - * @param localLeaves - * @param ghosts - * @return auto - */ template<typename VectorLeafInfoType> auto merge_split_structure(VectorLeafInfoType const& localLeaves, VectorLeafInfoType const& ghosts) { @@ -257,7 +265,6 @@ namespace scalfmm::tree return std::make_tuple(morton, number_of_particles); } - /** * @brief Split the LeafInfo structure in two vectors (Morton, number_of_particles) * @@ -282,15 +289,6 @@ namespace scalfmm::tree } return std::make_tuple(morton, number_of_particles); } - - /** - * @brief - * - * @tparam VectorLeafInfoIteratorType - * @param begin - * @param end - * @return auto - */ template<typename VectorLeafInfoIteratorType> auto split_structure(const VectorLeafInfoIteratorType begin, const VectorLeafInfoIteratorType end) { @@ -319,8 +317,8 @@ namespace scalfmm::tree * @return the vector ot the three blocs */ template<typename VectorBlockType> - auto merge_blocs(VectorBlockType const& bloc1, VectorBlockType const& bloc2, - VectorBlockType const& bloc3) -> VectorBlockType + VectorBlockType merge_blocs(VectorBlockType const& bloc1, VectorBlockType const& bloc2, + VectorBlockType const& bloc3) { // Merge the three block structure auto size = bloc1.size() + bloc2.size() + bloc3.size(); @@ -341,38 +339,41 @@ namespace scalfmm::tree } return all_blocks; } - /** * @brief Construct the M2M ghost for the current level * * The routine check if there is ghosts during the M2M operation. * If yes, we exchange the ghost indexes - * @tparam BoxType - * @tparam VectorMortonIdxType - * @tparam MortonDistributionType + * @tparam Box + * @tparam VectorMortonIdx + * @tparam MortonDistribution * @param para the parallel manager * @param box the simulation box * @param level the current level * @param local_morton_vect * @param cells_distrib teh distribution of cells * @param top if top is true nothing is down - * @return VectorMortonIdxType + * @return VectorMortonIdx */ - template<typename BoxType, typename VectorMortonIdxType, typename MortonDistributionType> - [[nodiscard]] auto build_ghost_m2m_let_at_level(cpp_tools::parallel_manager::parallel_manager& para, - BoxType& box, const int& level, - const VectorMortonIdxType& local_morton_vect, - const MortonDistributionType& cells_distrib, - bool top = false) -> VectorMortonIdxType + template<typename Box, typename VectorMortonIdx, typename MortonDistribution> + [[nodiscard]] auto build_ghost_m2m_let_at_level(cpp_tools::parallel_manager::parallel_manager& para, Box& box, + const int& level, VectorMortonIdx const& local_morton_vect, + MortonDistribution const& cells_distrib, + bool top = false) -> VectorMortonIdx { - using morton_type = typename VectorMortonIdxType::value_type; - static constexpr int nb_children = math::pow(2, BoxType::dimension); - VectorMortonIdxType ghosts; + using morton_type = typename VectorMortonIdx::value_type; + static constexpr int nb_children = math::pow(2, Box::dimension); + VectorMortonIdx ghosts; + std::clog << " begin build_ghost_m2m_let_at_level " << level << std::endl; + io::print(std::clog, "local_morton_vect: ", local_morton_vect); + std::clog << std::endl; + if(top) return ghosts; const auto rank = para.get_process_id(); const auto proc = para.get_num_processes(); auto comm = para.get_communicator(); + // parallel::utils::print_distrib("level_dist[" + std::to_string(level) + "]: ", rank, cells_distrib); cpp_tools::parallel_manager::mpi::request mpi_status_left, mpi_status_right; @@ -385,20 +386,26 @@ namespace scalfmm::tree // parallel::utils::print_distrib("level_dist[leaf_level]): ", rank, cells_distrib); bool comm_left{false}, comm_right{false}; // Check on left + std::clog << "check left\n "; + if(rank > 0) { auto first_index = local_morton_vect[0]; - auto parent_first = first_index >> BoxType::dimension; - auto last_parent_previous_proc = cells_distrib[rank - 1][1] >> BoxType::dimension; + auto parent_first = first_index >> Box::dimension; + auto last_parent_previous_proc = (cells_distrib[rank - 1][1] - 1) >> Box::dimension; + // std::clog << "index : " << first_index << " Parent ! " << parent_first << " " + // << last_parent_previous_proc << std::endl; if(parent_first == last_parent_previous_proc) { comm_left = true; + // std::cout << "Need to exchange between " << rank << " and " << rank - 1 << std::endl; int idx{1}; send[idx] = local_morton_vect[0]; for(int i = 1; i < std::min(nb_children, int(local_morton_vect.size())); ++i) { - auto parent_index = local_morton_vect[i] >> BoxType::dimension; + auto parent_index = local_morton_vect[i] >> Box::dimension; + // std::cout << "index : " << local_morton_vect[i] << " Parent ! " << parent_first << " " << last_parent_previous_proc << std::endl; if(parent_index == last_parent_previous_proc) { ++idx; @@ -415,22 +422,23 @@ namespace scalfmm::tree comm.isend(send.data(), nb_children, mpi_type, rank - 1, tag); } } - // std::cout << "check right\n "; + // std::clog << "check right\n "; auto last_index = local_morton_vect[local_morton_vect.size() - 1]; - auto parent_last = last_index >> BoxType::dimension; - // std::cout << "last_index " << last_index << " parent_last " << parent_last <<std::endl; + auto parent_last = last_index >> Box::dimension; + // std::clog << "last_index " << last_index << " parent_last " << parent_last << std::endl; ghosts.resize(0); if(rank < proc - 1) { // check on left - auto first_parent_next_proc = cells_distrib[rank + 1][0] >> BoxType::dimension; - // std::cout << "Parent ! " << parent_last << " " << first_parent_next_proc << std::endl; + auto first_parent_next_proc = cells_distrib[rank + 1][0] >> Box::dimension; + // std::clog << "Parent ! " << parent_last << " " << first_parent_next_proc << std::endl; if(parent_last == first_parent_next_proc) { comm_right = true; // std::cout << "Need to exchange between " << rank << " and " << rank + 1 << std::endl; /*mpi_status_right =*/comm.recv(recv.data(), nb_children, mpi_type, rank + 1, tag); - + // cpp_tools::parallel_manager::mpi::request::waitall(1, &mpi_status_right); + // io::print("recv ",recv ); ghosts.resize(recv[0]); for(int i = 0; i < ghosts.size(); ++i) { @@ -438,42 +446,43 @@ namespace scalfmm::tree } } } - + // io::print(std::clog, "m2m ghosts ", ghosts); + // std::clog << " end build_ghost_m2m_let_at_level" << std::endl; return ghosts; } - - /** - * @brief construct the local essential tree (LET) at the level. - * - * We start from a given Morton index distribution and we compute all - * interactions needed - * in the algorithm steps. - * At the leaf level it corresponds to the interactions coming from the - * direct pass (P2P operators) - * and in the transfer pass (M2L operator). For the other levels we - * consider only the M2L interactions. - * The leaves_distrib and the cells_distrib might be different - * At the end the let has also all the interaction list computed - * - * @tparam BoxType - * @tparam VectorMortonIdxType - * @tparam MortonDistributionType - * @param para - * @param box - * @param[in] level the level to construct the let - * @param local_morton_vect - * @param[in] cells_distrib the morton index distribution for - * the cells at the leaf level. - * @param separation - * @return VectorMortonIdxType - */ - template<typename BoxType, typename VectorMortonIdxType, typename MortonDistributionType> - [[nodiscard]] auto build_let_at_level(cpp_tools::parallel_manager::parallel_manager& para, BoxType& box, - const int& level, const VectorMortonIdxType& local_morton_vect, - const MortonDistributionType& cells_distrib, - const int& separation) -> VectorMortonIdxType + /// + /// \brief construct the local essential tree (LET) at the level. + /// + /// We start from a given Morton index distribution and we compute all + /// interactions needed + /// in the algorithm steps. + /// At the leaf level it corresponds to the interactions coming from the + /// direct pass (P2P operators) + /// and in the transfer pass (M2L operator). For the other levels we + /// consider only the M2L interactions. + /// The leaves_distrib and the cells_distrib might be different + /// At the end the let has also all the interaction list computed + /// + /// \param[inout] tree the tree to compute the let. + /// \param[in] local_morton_idx the morton index of the particles in the + /// processors. + /// + /// + /// \param[in] cells_distrib the morton index distribution for + /// the cells at the leaf level. + /// + /// \param[in] level the level to construct the let + /// + template<typename Box, typename VectorMortonIdx, typename MortonDistribution> + [[nodiscard]] auto build_let_at_level(cpp_tools::parallel_manager::parallel_manager& para, Box& box, + const int& level, const VectorMortonIdx& local_morton_vect, + const MortonDistribution& cells_distrib, + const int& separation) -> VectorMortonIdx { const auto my_rank = para.get_process_id(); + // std::cout << cpp_tools::colors::red << " --> Begin let::build_let_at_level() at level = " << level + // << "dist: " << cells_distrib[my_rank] << cpp_tools::colors::reset << std::endl; + // io::print("rank(" + std::to_string(my_rank) + ") local_morton_vect : ", local_morton_vect); // we compute the cells needed in the M2L operator @@ -485,10 +494,15 @@ namespace scalfmm::tree std::cout << std::flush; /// Look if the morton index really exists in the distributed tree parallel::utils::check_if_morton_index_exist(para, needed_idx, cells_distrib, local_morton_vect); + /// + // io::print("rank(" + std::to_string(my_rank) + ") check_if_morton_index_exist(m2l) : ", needed_idx); + // + // std::cout << cpp_tools::colors::red + // << "rank(" + std::to_string(my_rank) + ")-- > End let::build_let_at_level() " + // << cpp_tools::colors::reset << std::endl; return needed_idx; } - // template<typename OctreeTree, typename VectorMortonIdx, typename MortonDistribution> // void build_let_at_level(cpp_tools::parallel_manager::parallel_manager& para, OctreeTree& tree, // const VectorMortonIdx& local_morton_idx, const MortonDistribution& cells_distrib, @@ -516,13 +530,12 @@ namespace scalfmm::tree // std::cout << cpp_tools::colors::green << " --> End let::build_let_at_level() at level = " << level // << cpp_tools::colors::reset << std::endl; // } - /** * @brief * - * @tparam BoxType - * @tparam VectorMortonIdxType - * @tparam MortonDistributionType + * @tparam Box + * @tparam VectorMortonIdx + * @tparam MortonDistribution * @param para * @param box * @param level @@ -530,15 +543,19 @@ namespace scalfmm::tree * @param leaves_distrib * @param separation */ - template<typename BoxType, typename VectorLeafInfo, typename MortonDistributionType> - [[nodiscard]] auto build_let_leaves(cpp_tools::parallel_manager::parallel_manager& para, BoxType const& box, + template<typename Box, typename VectorLeafInfo, typename MortonDistribution> + [[nodiscard]] auto build_let_leaves(cpp_tools::parallel_manager::parallel_manager& para, Box const& box, const std::size_t& level, const VectorLeafInfo& leaf_info /*local_morton_vect*/, - MortonDistributionType const& leaves_distrib, const int& separation) + MortonDistribution const& leaves_distrib, const int& separation) -> VectorLeafInfo { auto my_rank = para.get_process_id(); + // std::cout << cpp_tools::colors::green + // << "rank(" + std::to_string(my_rank) + ") --> Begin let::build_let_leaves() " + // << cpp_tools::colors::reset << std::endl; + // io::print("rank(" + std::to_string(my_rank) + ") leaf_info : ", leaf_info); // we compute the leaves involved in the P2P operators auto leaf_info_to_add = @@ -554,7 +571,9 @@ namespace scalfmm::tree /// needed_idx input contains the Morton index of leaf /// output contains the number of particles in the leaf + // io::print("rank(" + std::to_string(my_rank) + ") 1 leaf_info_to_add(p2p) : ", leaf_info_to_add); parallel::utils::check_if_leaf_morton_index_exist(para, needed_idx, leaves_distrib, leaf_info); + // io::print("rank(" + std::to_string(my_rank) + ") check needed_idx.size : ", needed_idx); int idx{0}; for(int i = 0; i < needed_idx.size(); ++i) @@ -571,49 +590,54 @@ namespace scalfmm::tree auto last = leaf_info_to_add.cbegin() + idx; leaf_info_to_add.erase(last, leaf_info_to_add.end()); } - + /// + // io::print("rank(" + std::to_string(my_rank) + ") final leaf_info_to_add(p2p) : ", leaf_info_to_add); + // std::cout << cpp_tools::colors::green + // << "rank(" + std::to_string(my_rank) + ")-- > End let::build_let_leaves() " + // << cpp_tools::colors::reset << std::endl; return leaf_info_to_add; } - /** - * @brief buildLetTree Build the let of the tree and the leaves and cells distributions - * - * The algorithm has 5 steps: - * 1) We sort the particles according to their Morton Index (leaf level) - * 2) Build the leaf morton vector of my local particles and construct either - * the leaves distribution or the cell distribution according to parameter - * use_leaf_distribution or use_particle_distribution - * 3) Fit the particles inside the use_leaf_distribution - * 4) Construct the tree according to my particles and build the leaf - * morton vector of my local particles - * 5) Constructing the let level by level - * - * @tparam TreeType - * @tparam VectorType - * @tparam BoxType - * @param[in] manager the parallel manager - * @param[in] number_of_particles total number of particles in the simulation - * @param[in] particle_container vector of particles on my node. On output the array is sorted and correspond to teh distribution built - * @param[in] box size of the simulation box - * @param[in] leaf_level level of the leaf in the tree - * @param[in] level_shared the level at which cells are duplicated on processors. If the level is negative, - * nothing is duplicated. - * @param[in] groupSizeLeaves blocking parameter for the leaves (particles) - * @param[in] groupSizeCells blocking parameter for the cells - * @param[in] order order of the approximation to build the tree - * @param[in] use_leaf_distribution to say if you consider the leaf distribution - * @param[in] use_particle_distribution to say if you consider the particle distribution - * @return localGroupTree the LET of the octree processors - */ - template<typename TreeType, typename VectorType, typename BoxType> - auto buildLetTree(cpp_tools::parallel_manager::parallel_manager& manager, - const std::size_t& number_of_particles, VectorType& particle_container, const BoxType& box, - const int& leaf_level, const int& level_shared, const int groupSizeLeaves, - const int groupSizeCells, const int order, const int separation, - const bool use_leaf_distribution, const bool use_particle_distribution) -> TreeType + /// + /// \brief buildLetTree Build the let of the tree and the leaves and cells distributions + /// + /// The algorithm has 5 steps: + /// 1) We sort the particles according to their Morton Index (leaf level) + /// 2) Build the leaf morton vector of my local particles and construct either + /// the leaves distribution or the cell distribution according to parameter + /// use_leaf_distribution or use_particle_distribution + /// 3) Fit the particles inside the use_leaf_distribution + /// 4) Construct the tree according to my particles and build the leaf + /// morton vector of my local particles + /// 5) Constructing the let level by level + /// + /// \param[in] manager the parallel manager + /// \param[in] number_of_particles total number of particles in the simulation + /// \param[in] particle_container vector of particles on my node. On output the + /// array is sorted and correspond to teh distribution built + /// \param[in] box size of the simulation box + /// \param[in] leaf_level level of the leaf in the tree + /// \param[in] level_shared the level at which cells are duplicated on processors. If the level is negative, + /// nothing is duplicated. + /// \param[in] groupSizeLeaves blocking parameter for the leaves (particles) + /// \param[in] groupSizeCells blocking parameter for the cells + /// @param[in] order order of the approximation to build the tree + /// @param[in] use_leaf_distribution to say if you consider the leaf distribution + /// @param[in] use_particle_distribution to say if you consider the particle distribution + /// @return localGroupTree the LET of the octree + + /// processors + template<typename Tree_type, typename Vector_type, typename Box_type> + Tree_type + buildLetTree(cpp_tools::parallel_manager::parallel_manager& manager, const std::size_t& number_of_particles, + Vector_type& particle_container, const Box_type& box, const int& leaf_level, + const int& level_shared, const int groupSizeLeaves, const int groupSizeCells, const int order, + const int separation, const bool use_leaf_distribution, const bool use_particle_distribution) { + // std::cout << cpp_tools::colors::green << " --> Begin let::group_let() " << cpp_tools::colors::reset + // << std::endl; // - static constexpr std::size_t dimension = VectorType::value_type::dimension; + static constexpr std::size_t dimension = Vector_type::value_type::dimension; const auto rank = manager.get_process_id(); //////////////////////////////////////////////////////////////////////////// /// Sort the particles at the leaf level according to their Morton index @@ -646,27 +670,28 @@ namespace scalfmm::tree { particleMortonIndex[part] = scalfmm::index::get_morton_index(particle_container[part].position(), box, leaf_level); - // std::cout << part << " m " << particleMortonIndex[part] << particle_container[part] << std::endl; + // std::cout << part << " m " << particleMortonIndex[part] << particle_container[part] << std::endl; } auto leafMortonIdx(particleMortonIndex); // delete duplicate indexes auto last = std::unique(leafMortonIdx.begin(), leafMortonIdx.end()); leafMortonIdx.erase(last, leafMortonIdx.end()); /////////////////////////////////////////////////////////////////////////////////// - io::print("rank(" + std::to_string(rank) + ") --> init leafMortonIdx: ", leafMortonIdx); + // io::print("rank(" + std::to_string(rank) + ") --> init leafMortonIdx: ", leafMortonIdx); /// //////////////////////////////////////////////////////////////////////////////////////////// //// Construct a uniform distribution for the leaves/cells at the leaves level /// /// A morton index should be own by only one process /// - using morton_distrib_type = typename TreeType::data_distrib_type; + using morton_distrib_type = typename Tree_type::data_distrib_type; /// /// Build a uniform distribution of the leaves/cells /// Here the distribution is a closed interval and not semi open one !!! /// morton_distrib_type leaves_distrib; + morton_distrib_type particles_distrib(manager.get_num_processes()); if(use_leaf_distribution) { leaves_distrib = std::move(scalfmm::parallel::utils::balanced_leaves(manager, leafMortonIdx)); @@ -675,6 +700,7 @@ namespace scalfmm::tree interval[1] += 1; } } + // io::print("rank(" + std::to_string(rank) + ") --> leaves_distrib: ", leaves_distrib); //// End //////////////////////////////////////////////////////////////////////////////////////////// /// @@ -685,7 +711,6 @@ namespace scalfmm::tree /// /// A morton index should be own by only one process /// - morton_distrib_type particles_distrib(manager.get_num_processes()); if(use_particle_distribution) { particles_distrib = std::move(scalfmm::parallel::utils::balanced_particles( @@ -721,13 +746,14 @@ namespace scalfmm::tree /// parallel::utils::fit_particles_in_distrib(manager, particle_container, particleMortonIndex, particles_distrib, box, leaf_level, number_of_particles); + // io::print("rank(" + std::to_string(rank) + ") --> particle_container: ", particle_container); /// All the particles are located on the good process //////////////////////////////////////////////////////////////////////////////////////////// /// /// Construct the local tree based on our set of particles // Build and empty tree - TreeType localGroupTree(manager, static_cast<std::size_t>(leaf_level + 1), level_shared, order, - groupSizeLeaves, groupSizeCells, box); + Tree_type localGroupTree(manager, static_cast<std::size_t>(leaf_level + 1), level_shared, order, + groupSizeLeaves, groupSizeCells, box); /// Set true because the particles are already sorted /// In fact we have all the leaves to add in leafMortonIdx - could be used to construct /// the tree !!! @@ -735,6 +761,9 @@ namespace scalfmm::tree #ifdef SCALFMM_USE_MPI + // std::cout << cpp_tools::colors::red; + // io::print("rank(" + std::to_string(rank) + ") leafMortonIdx: ", leafMortonIdx); + // std::cout << cpp_tools::colors::reset << std::endl; /// End //////////////////////////////////////////////////////////////////////////////////////////// /// @@ -746,6 +775,7 @@ namespace scalfmm::tree { leafMortonIdx[i] = scalfmm::index::get_morton_index(particle_container[i].position(), box, leaf_level); } + // io::print("rank(" + std::to_string(rank) + ") --> leafMortonIdx: ", leafMortonIdx); // localLeafInfo contains information on leaves (morton, number of particles) own by th current process std::vector<tree::leaf_info_type<morton_type>> localLeafInfo(leafMortonIdx.size()); @@ -770,6 +800,8 @@ namespace scalfmm::tree } leafMortonIdx.resize(idx + 1); localLeafInfo.resize(leafMortonIdx.size()); + // io::print("rank(" + std::to_string(rank) + ") --> localLeafInfo: ", localLeafInfo); + // io::print("rank(" + std::to_string(rank) + ") --> leafMortonIdx: ", leafMortonIdx); //////////////////////////////////////////////////////////////////////////////////////// // Build the pointer of the tree with all parameters @@ -783,9 +815,17 @@ namespace scalfmm::tree auto ghostP2P_leafInfo = build_let_leaves(manager, box, leaf_level, localLeafInfo, particles_distrib, separation); + // io::print("rank(" + std::to_string(rank) + ") --> final ghostP2P_leafInfo: ", + // ghostP2P_leafInfo); io::print("rank(" + std::to_string(rank) + ") --> final localLeafInfo: ", + // localLeafInfo); localGroupTree.set_leaf_distribution(particles_distrib); + // std::cout << std::flush; + // std::cout << cpp_tools::colors::red; + // std::cout << "END LEAF LEVEL " << std::endl; + // std::cout << cpp_tools::colors::reset; + /// If the distribution is not the same for the leaf and the cell we redistribute the /// morton index according to the uniform distribution of morton index /// @@ -817,41 +857,66 @@ namespace scalfmm::tree auto ghost_m2l_cells = build_let_at_level(manager, box, leaf_level, leafMortonIdx, level_dist[leaf_level], separation); + // io::print("rank(" + std::to_string(rank) + ") --> final ghost_cells(m2l): ", ghost_m2l_cells); auto ghost_m2m_cells = build_ghost_m2m_let_at_level(manager, box, leaf_level, leafMortonIdx, level_dist[leaf_level]); + // io::print("rank(" + std::to_string(rank) + ") --> ghost_cells(m2m): ", ghost_m2m_cells); // distribution, particles + // std::cout << " $$$$$$$$$$$$$$$$$$$$$$$$$ leaf level " << leaf_level << " $$$$$$$$$$$$$$$$$$$$$$$$$ " + // << std::endl; localGroupTree.create_from_leaf_level(localLeafInfo, ghostP2P_leafInfo, ghost_m2l_cells, ghost_m2m_cells, particles_distrib[rank], level_dist[leaf_level][rank]); + // std::cout << " $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ leaf level $$$$$$$$$$$$$$$$$$$$$$$$$$ " + // << std::endl; + // parallel::utils::print_distrib("leaf_cell distribution ", rank, level_dist[leaf_level]); // build all leaves between leaf_level - 1 and level_shared -1. // we use the maximum because if we don't share certain levels this number is <0 + // std::cout << "std::max(level_shared, int(localGroupTree.top_level())) " + // << std::max(level_shared, int(localGroupTree.top_level())) << std::endl; + // std::cout << " XXXXXXXXXX -> std::max(level_shared, int(localGroupTree.top_level() - 1))" + // << std::max(level_shared, int(localGroupTree.top_level() - 1)) << std::endl; + ; for(int level = leaf_level - 1; level >= localGroupTree.top_level(); --level) { + // std::cout << " $$$$$$$$$$$$$$$$$$$$$$$$$ level " << level << " $$$$$$$$$$$$$$$$$$$$$$$$$ " + // << std::endl; std::int64_t ghost_l2l_cell{-1}; // Get the distribution at the current level, the ghost cell involved in l2l operator // and the morton index of the existing cells at this level level_dist[level] = std::move(parallel::utils::build_upper_distribution( manager, dimension, level, leafMortonIdx, ghost_l2l_cell, level_dist[level + 1])); + // io::print("rank(" + std::to_string(rank) + ") MortonIdx(" + std::to_string(level) + "): ", + // leafMortonIdx); + // std::cout << " ghost_l2l_cell: " << ghost_l2l_cell << std::endl; // Set the distribution in tres tree localGroupTree.set_cell_distribution(level, level_dist[level]); // build the m2l ghost cells at this level auto ghost_cells_level = build_let_at_level(manager, box, level, leafMortonIdx, level_dist[level], separation); + // io::print("rank(" + std::to_string(rank) + ") level=" + std::to_string(level) + + // " --> final ghost_cells(m2l): ", + // ghost_cells_level); // build the m2m ghost cells at this level auto ghost_m2m_cells = build_ghost_m2m_let_at_level( manager, box, leaf_level, leafMortonIdx, level_dist[level], level == localGroupTree.top_level()); - // io::print("rank(" + std::to_string(rank) + ") --> ghost_cells(m2m): ", ghost_m2m_cells); + // io::print("rank(" + std::to_string(rank) + ") --> ghost_cells(m2m): ", ghost_m2m_cells); + // Create the groupe of cells structure for this level localGroupTree.create_cells_at_level(level, leafMortonIdx, ghost_cells_level, ghost_m2m_cells, ghost_l2l_cell, level_dist[level][rank]); + // std::cout << " $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ " << std::endl + // << std::flush; } + // std::cout << " end loop\n" << std::flush; manager.get_communicator().barrier(); + // std::cout << " end barrier\n" << std::flush; } else #endif // SCALFMM_USE_MPI @@ -867,6 +932,7 @@ namespace scalfmm::tree localGroupTree.construct(particleMortonIndex); // then, we fill each leaf with its particles (the particle container is sorted ) localGroupTree.fill_leaves_with_particles(particle_container); + // localGroupTree.set_leaf_distribution(particles_distrib); localGroupTree.set_cell_distribution(leaf_level, leaves_distrib); @@ -879,8 +945,17 @@ namespace scalfmm::tree } } + // std::cout << cpp_tools::colors::red << std::endl << std::flush; + // std::cout << "set iterators \n" << std::flush << std::flush; localGroupTree.set_valid_iterators(true); + // std::cout << "begin fill_leaves_with_particles \n" << std::flush; localGroupTree.fill_leaves_with_particles(particle_container); + // std::cout << "end fill_leaves_with_particles \n" << std::flush; + + // std::cout << cpp_tools::colors::reset << std::endl; + // std::cout << cpp_tools::colors::green << " --> End let::group_let() " << cpp_tools::colors::reset + // << std::endl + // << std::flush; return localGroupTree; } diff --git a/include/scalfmm/tree/group_tree_view.hpp b/include/scalfmm/tree/group_tree_view.hpp index 500ec1c22acf194a049502cd13215dab7a064a3d..b5d0c1a5b604571632c7ea48a9431d11d8bd054e 100644 --- a/include/scalfmm/tree/group_tree_view.hpp +++ b/include/scalfmm/tree/group_tree_view.hpp @@ -911,7 +911,7 @@ namespace scalfmm::component } ++group_index; } -#ifndef _DEBUG_BLOCK_DATA +#ifdef _DEBUG_BLOCK_DATA std::clog << " FINAl block\n"; int tt{0}; for(auto pg: m_group_of_leaves) @@ -952,16 +952,31 @@ namespace scalfmm::component } } - /** - * @brief Resets all particles (positions, inputs, outputs and variables). - * - */ - inline auto reset_particles() -> void + /// @brief reset all outputs in particle structure + /// + /// @return + inline auto number_particles() -> std::size_t + { + std::size_t nb{0}; + for(auto it = this->begin_mine_leaves(); it != end_mine_leaves(); ++it) + { + // lnumber of particles + nb += (*it)->storage().size(); + } + return nb; + } + /// @brief reset all outputs in particle structure + /// + /// @return + inline auto reset_particles() { - // loop on group of leaves for(auto pg: m_group_of_leaves) { - pg->storage().reset_particles(); + // loop on leaves + for(auto& leaf: pg->block()) + { + leaf.particles().clear(); + } } } diff --git a/include/scalfmm/tree/io.hpp b/include/scalfmm/tree/io.hpp index 64753b5cfbb3d35f28424e2430da3b54e706584a..2a555856614177f6b71ea4524bb50d96f76e06ac 100644 --- a/include/scalfmm/tree/io.hpp +++ b/include/scalfmm/tree/io.hpp @@ -106,17 +106,17 @@ namespace scalfmm::io template<typename TreeType> inline auto trace(std::ostream& os, const TreeType& tree, const std::size_t level_trace = 0) -> void { - std::cout << "Trace of the group tree\n"; + std::cout << "Trace of the tree\n"; auto level_0 = []() {}; auto level_1 = [&tree, &os]() { - os << "group_tree | height = " << tree.height() << '\n'; - os << "group_tree | order = " << tree.order() << '\n'; - os << "group_tree | Blocking group size for leaves = " << tree.group_of_leaf_size() << '\n'; - os << "group_tree | Blocking group size for cells = " << tree.group_of_cell_size() << '\n'; - os << "group_tree | number of leaves group = " << tree.leaf_groups_size() << '\n'; + os << "group_tree | height = " << tree.height() << '\n' << std::flush; + os << "group_tree | order = " << tree.order() << '\n' << std::flush; + os << "group_tree | Blocking group size for leaves = " << tree.group_of_leaf_size() << '\n' << std::flush; + os << "group_tree | Blocking group size for cells = " << tree.group_of_cell_size() << '\n' << std::flush; + os << "group_tree | number of leaves group = " << tree.leaf_groups_size() << '\n' << std::flush; auto cell_level_it = tree.cbegin_cells() + (tree.height() - 1); int top_level = tree.box().is_periodic() ? 0 : 2; @@ -125,7 +125,8 @@ namespace scalfmm::io auto group_of_cell_begin = std::cbegin(*(cell_level_it)); auto group_of_cell_end = std::cend(*(cell_level_it)); os << "group_tree | number of cells group (" << level - << ")= " << std::distance(group_of_cell_begin, group_of_cell_end) << '\n'; + << ")= " << std::distance(group_of_cell_begin, group_of_cell_end) << '\n' + << std::flush; } }; @@ -133,9 +134,9 @@ namespace scalfmm::io { auto tree_height = tree.height(); std::size_t id_group{0}; - os << "======================================================================\n"; - os << "========== leaf level : " << tree_height - 1 << " ============================\n"; - os << tree.group_of_leaf_size() << " groups at leaf level.\n"; + os << "======================================================================\n" << std::flush; + os << "========== leaf level : " << tree_height - 1 << " ============================\n" << std::flush; + os << tree.group_of_leaf_size() << " groups at leaf level.\n" << std::flush; std::for_each(tree.cbegin_leaves(), tree.cend_leaves(), // std::cbegin(m_group_of_leaf), std::cend(m_group_of_leaf), @@ -154,8 +155,8 @@ namespace scalfmm::io { os << leaf.index() << "(" << leaf.size() << ") "; }); os << std::endl; }); - os << "======================================================================\n"; - os << "======================================================================\n"; + os << "======================================================================\n" << std::flush; + os << "======================================================================\n" << std::flush; // auto cell_level_it = std::cbegin(m_group_of_cell_per_level) + (tree_height - 1); auto cell_level_it = tree.cbegin_cells() + (tree_height - 1); @@ -179,7 +180,9 @@ namespace scalfmm::io os << ", is_mine: " << std::boolalpha << current_group_symbolics.is_mine << "\n"; os << " group size: " << current_group_symbolics.number_of_component_in_group << ", "; - os << "global index = " << current_group_symbolics.idx_global << " \n"; + os << "global index = " << current_group_symbolics.idx_global << " \n" + << " ref: depend(multi)=" << &ptr_group->ccomponent(0).cmultipoles(0) + << " rf depend(locals)=" << &ptr_group->ccomponent(0).clocals(0) << " \n"; os << " index: "; component::for_each(std::begin(*ptr_group), std::end(*ptr_group), [&os](auto& cell) { os << cell.index() << " "; }); @@ -241,14 +244,14 @@ namespace scalfmm::io auto tree_height = tree.height(); std::size_t id_group{0}; - os << "========================== M2L interaction list ========================= \n"; + os << "========================== M2L interaction list ========================= \n" << std::flush; auto cell_level_it = tree.cbegin_cells() + (tree_height - 1); id_group = 0; int top_level = tree.box().is_periodic() ? 0 : 2; for(int level = int(tree_height) - 1; level >= top_level; --level) { - os << "========== level : " << level << " ============================\n"; + os << "========== level : " << level << " ============================\n" << std::flush; auto group_of_cell_begin = std::cbegin(*(cell_level_it)); auto group_of_cell_end = std::cend(*(cell_level_it)); std::for_each( @@ -258,27 +261,29 @@ namespace scalfmm::io auto const& current_group_symbolics = ptr_group->csymbolics(); os << "*** Group of cell index " << ++id_group << " *** index in [" << current_group_symbolics.starting_index << ", " << current_group_symbolics.ending_index - << "["; - os << ", is_mine: " << std::boolalpha << current_group_symbolics.is_mine << "\n"; - os << " group size: " << current_group_symbolics.number_of_component_in_group << ", "; + << "[" << std::flush; + os << ", is_mine: " << std::boolalpha << current_group_symbolics.is_mine << "\n" << std::flush; + os << " group size: " << current_group_symbolics.number_of_component_in_group << ", " + << std::flush; os << "global index = " << current_group_symbolics.idx_global << " \n" << " ref: depend(multi)=" << &ptr_group->ccomponent(0).cmultipoles(0) - << " rf depend(locals)=" << &ptr_group->ccomponent(0).clocals(0) << " \n"; - os << " index: \n"; + << " rf depend(locals)=" << &ptr_group->ccomponent(0).clocals(0) << " \n" + << std::flush; + os << " index: \n" << std::flush; int cpt = 0; component::for_each(std::begin(*ptr_group), std::end(*ptr_group), [&cpt, &os](auto& cell) { auto& cell_symbolics = cell.symbolics(); os << " " << cpt++ << " " << cell.index() << " m2l_list (" - << cell_symbolics.existing_neighbors << "): "; + << cell_symbolics.existing_neighbors << "): " << std::flush; // get the m2l interaction list auto index = cell_symbolics.interaction_iterators; for(std::size_t idx = 0; idx < cell_symbolics.existing_neighbors; ++idx) { os << index[idx]->index() << " "; } - os << std::endl; + os << std::endl << std::flush; }); os << std::endl; }); diff --git a/include/scalfmm/tree/utils.hpp b/include/scalfmm/tree/utils.hpp index f91229e48da054bdca4fc48618f3767c5221f1d1..02746330ad1ffce69e34a09b646e4ee6ccacacdf 100644 --- a/include/scalfmm/tree/utils.hpp +++ b/include/scalfmm/tree/utils.hpp @@ -5,6 +5,13 @@ #ifndef SCALFMM_TREE_UTILS_HPP #define SCALFMM_TREE_UTILS_HPP +#include <array> +#include <fstream> +#include <iostream> +#include <iterator> +#include <tuple> +#include <type_traits> + #include "scalfmm/container/point.hpp" #include "scalfmm/meta/traits.hpp" #include "scalfmm/meta/utils.hpp" @@ -13,13 +20,6 @@ #include "scalfmm/utils/massert.hpp" #include "scalfmm/utils/math.hpp" -#include <array> -#include <fstream> -#include <iostream> -#include <iterator> -#include <tuple> -#include <type_traits> - // namespace scalfmm::utils namespace scalfmm::index { diff --git a/include/scalfmm/utils/compare_results.hpp b/include/scalfmm/utils/compare_results.hpp index 5e43bde84aee1051da8b6b57d9176014af6f4a1b..4302a33988d1e2d5e50f6651c697f6775b6520bb 100644 --- a/include/scalfmm/utils/compare_results.hpp +++ b/include/scalfmm/utils/compare_results.hpp @@ -22,16 +22,20 @@ namespace scalfmm namespace utils { /** - * @brief + * @brief compare two arrays + * + * An array contains the position, the inputs and the outputs. The number of values (nb_values) + * per particle is array1.size() / nbParticles and the + * index1_to_compare and index2_to_compare is between [0, nb_values[ * * @tparam ArrayType * @param tag * @param dimension * @param nbParticles - * @param index1_to_compare + * @param index1_to_compare. * @param index2_to_compare - * @param array1 - * @param array2 + * @param array1. first array + * @param array2. second array */ template<class ArrayType> auto compare_two_arrays(const std::string& tag, const int dimension, const std::size_t& nbParticles, diff --git a/include/scalfmm/utils/compare_trees.hpp b/include/scalfmm/utils/compare_trees.hpp index 3b4318ac705157bdafc47efc9e2b0172165f8b89..44982ca2045de42cefcc2f7cceec6a67e70291cc 100644 --- a/include/scalfmm/utils/compare_trees.hpp +++ b/include/scalfmm/utils/compare_trees.hpp @@ -1,39 +1,32 @@ -// -------------------------------- -// See LICENCE file at project root -// File : scalfmm/utils/compare_trees.hpp -// -------------------------------- #pragma once +#include <iostream> -#include "scalfmm/utils/io_helpers.hpp" - -#include "xtensor-blas/xblas.hpp" +#include <xtensor-blas/xblas.hpp> -#include <iostream> +#include <scalfmm/utils/io_helpers.hpp> namespace scalfmm::utils { + ///////////////////////////////// + /// /** * @brief compare the cells of two trees * * For all levels, depending on the option, we compare the multipole and local tensors. We calculate the - * frobenius norm of the error between the two tensors. If this norm is smaller than eps, then the test is true. + * Frobenius norm of the error between the two tensors. If this norm is smaller than eps, then the test is true. * * option 1 only the multipoles * option 2 only the locals * option 3 both multipoles and locals * - * @tparam TreeType1 - * @tparam TreeType2 - * @tparam ValueType * @param tree1 first tree * @param tree2 second tree * @param eps the threshold * @param option int (1,2,3) -the option describe above * @return the comparaison */ - template<typename TreeType1, typename TreeType2, typename ValueType> - inline auto compare_two_trees(TreeType1 const& tree1, TreeType2 const& tree2, ValueType const eps, - int option) -> bool + template<typename Tree1, typename Tree2, typename Value_type> + auto inline compare_two_trees(Tree1 const& tree1, Tree2 const& tree2, Value_type const eps, int option) -> bool { bool check{true}, check_mul{true}, check_loc{true}; @@ -117,7 +110,9 @@ namespace scalfmm::utils // locals if(option != 1) { - std::cout << "check locals" << std::endl; + std::cout << "check locals " + << " level " << level << " Cell morton " << cell1.csymbolics().morton_index; + // << " error " << error << std::endl; auto const& locals1 = cell1.clocals(); auto const& locals2 = cell2.clocals(); auto number_of_arrays = locals1.size(); @@ -130,21 +125,21 @@ namespace scalfmm::utils diff.reshape({diff.size()}); auto error = xt::linalg::norm(diff); check = (error < eps); - // std::cout << "diff\n" << diff << std::endl; + std::cerr << " is good? " << std::boolalpha << check << std::endl; if(not check) { - std::cerr << "level " << level << " Cell morton " << cell1.csymbolics().morton_index - << " error " << error << std::endl; - std::cerr << "local1(" << l << ")\n" << local1 << std::endl; - std::cerr << "local2(" << l << ")\n" << local2 << std::endl; + std::cerr << " error " << error << std::endl; + std::cerr << " local1(" << l << ")\n" << local1 << std::endl; + std::cerr << " local2(" << l << ")\n" << local2 << std::endl; + std::cerr << " diff(" << l << ")\n" << diff << std::endl; check_loc = false; check_level = false; } } } // end option - } // end cells - } // end groups + } // end cells + } // end groups if(check_level) { std::cout << "level: " << level << " is good !\n"; diff --git a/include/scalfmm/utils/math.hpp b/include/scalfmm/utils/math.hpp index 0199da1f6150349d2f0cf74b857b26a4a0132ae2..6d5f6b089299c74836637710463890d19e3ba64d 100644 --- a/include/scalfmm/utils/math.hpp +++ b/include/scalfmm/utils/math.hpp @@ -13,11 +13,13 @@ namespace scalfmm::math { /** - * @brief + * @brief compute the factoriel of value * * @tparam T + * * @param value - * @return T + * + * @return T value! */ template<typename T> inline auto factorial(int value) -> T @@ -34,15 +36,20 @@ namespace scalfmm::math } return T(result); } - /** - * @brief + * @brief Compute a^p when p is an integer (meta function) * * @tparam T * @param a * @param p * @return T */ + template<typename T> + inline constexpr auto pow(const T a, const std::size_t p) -> T + { + return p == 0 ? 1 : a * pow<T>(a, p - 1); + } + template<typename T> inline auto pow(T a, int p) -> T { @@ -55,34 +62,14 @@ namespace scalfmm::math } /** - * @brief - * - * @tparam T - * @param a - * @param p - * @return T - */ - template<typename T> - inline constexpr auto pow(T a, std::size_t p) -> T - { - return p == 0 ? 1 : a * pow<T>(a, p - 1); - } - - /** - * @brief + * @brief Check if |a b| < epsilon for two floating_point * * @tparam T * @tparam U - * @tparam typename - * @tparam T> - * @tparam T>, - * typename - * @tparam U> * @param a * @param b - * @param epsilon - * @return true - * @return false + * @param epsilon the threshold + * @return */ template<typename T, typename U, typename = std::enable_if_t<std::is_floating_point<T>::value, T>, typename = std::enable_if_t<std::is_floating_point<U>::value, U>> @@ -92,15 +79,15 @@ namespace scalfmm::math } /** - * @brief + * @brief return true if value in [range_begin, range_end[ * - * @tparam ValueType1 - * @tparam ValueType + * @tparam ValueType1 for value + * @tparam ValueType for the interval * @param value * @param range_begin * @param range_end - * @return true - * @return false + * @return return true if value in [range_begin, range_end[ + * */ template<typename ValueType1, typename ValueType> inline constexpr auto between(ValueType1 value, ValueType range_begin, ValueType range_end) -> bool diff --git a/include/scalfmm/utils/parameters.hpp b/include/scalfmm/utils/parameters.hpp index 68c48dda92ae62ea4baf1675649b3a25173f329c..c5077e491db0b9453d0bbf5dd0cfc1adaf7a6848 100644 --- a/include/scalfmm/utils/parameters.hpp +++ b/include/scalfmm/utils/parameters.hpp @@ -65,6 +65,7 @@ namespace args cpp_tools::cl_parser::str_vec flags = {"--output-file", "-fout"}; std::string description = "Output particle file (with extension .fma (ascii) or bfma (binary)."; using type = std::string; + type def = "output.fma"; }; /** diff --git a/include/scalfmm/utils/sort.hpp b/include/scalfmm/utils/sort.hpp index d563a0bfecb05aed6a240f07f13f57c8e5a2a5dc..0d9fba59206fb755939a136d8e8e9ef6643898f6 100644 --- a/include/scalfmm/utils/sort.hpp +++ b/include/scalfmm/utils/sort.hpp @@ -264,7 +264,7 @@ namespace scalfmm::utils std::copy(array.begin(), array.end(), tmp_array); constexpr static const std::size_t dimension = points_type::dimension; // - const std::size_t max_level = 2; //sizeof(morton_type) * 8 / dimension - 1; + const std::size_t max_level = sizeof(morton_type) * 8 / dimension - 1; using pair_type = std::pair<morton_type, int>; std::vector<pair_type> tosort(nbParticles); #pragma omp parallel for shared(tosort, nbParticles, box, max_level, array) @@ -275,7 +275,7 @@ namespace scalfmm::utils tosort[i].second = i; } - std::sort(tosort.begin(), tosort.end(), [&](pair_type& a, pair_type& b) { return (a.first > b.first); }); + std::sort(tosort.begin(), tosort.end(), [&](pair_type& a, pair_type& b) { return (a.first < b.first); }); // // We fill the sorted array diff --git a/modules/internal/cpp_tools b/modules/internal/cpp_tools index 8358a544112a2ec6ee72d0a72a37de672b01a310..c67bb86b393d31dff8758fd20a6d87f9f6b8120d 160000 --- a/modules/internal/cpp_tools +++ b/modules/internal/cpp_tools @@ -1 +1 @@ -Subproject commit 8358a544112a2ec6ee72d0a72a37de672b01a310 +Subproject commit c67bb86b393d31dff8758fd20a6d87f9f6b8120d diff --git a/tools/compare_files.cpp b/tools/compare_files.cpp index 34f2caf1480247bd172b8dd6874bff73cf11b43b..dd46fea031add09a7270ca58d533c965af545e2a 100644 --- a/tools/compare_files.cpp +++ b/tools/compare_files.cpp @@ -5,14 +5,18 @@ * Author: Olivier Coulaud */ -#include <cstdlib> #include <fstream> #include <iostream> #include <string> +#include <vector> // +#include "scalfmm/meta/traits.hpp" #include "scalfmm/tools/fma_loader.hpp" #include "scalfmm/tree/box.hpp" #include "scalfmm/utils/compare_results.hpp" +#include "scalfmm/utils/sort.hpp" + +#include "scalfmm/utils/parameters.hpp" #include <cpp_tools/cl_parser/cl_parser.hpp> #include <cpp_tools/colors/colorized.hpp> @@ -64,9 +68,7 @@ //! \endcode using value_type = double; -constexpr int dimension = 3; -using position_type = scalfmm::container::point<double, dimension>; -using box_type = scalfmm::component::box<position_type>; + /// ///////////////////////////////////////////////////////////// /// Local parameters @@ -165,40 +167,17 @@ namespace local_args }; }; } // namespace local_args -///////////////////////////////////////////////////////////// - -auto main([[maybe_unused]] int argc, [[maybe_unused]] char* argv[]) -> int +///////////////////////////////////////////////////////////** +template<int Dimension> //, typename VectorType> +auto cmp(std::string const& file1, std::string const& file2, bool sort_particle, std::vector<int> const& index1, + std::vector<int> const& index2) -> int { - /// Parsing options - /// - auto parser = cpp_tools::cl_parser::make_parser(cpp_tools::cl_parser::help{}, local_args::input_file_one(), - local_args::input_file_two(), local_args::sort_particle(), - local_args::index_to_compare(), local_args::index2_to_compare()); - - // Parameter handling - parser.parse(argc, argv); - - std::cout << cpp_tools::colors::blue << "Entering sort_particles...\n" << cpp_tools::colors::reset; - - const auto filename1{parser.get<local_args::input_file_one>()}; - if(!filename1.empty()) - { - std::cout << cpp_tools::colors::blue << "<params> Input file 1: " << filename1 << cpp_tools::colors::reset - << '\n'; - } - - const auto filename2{parser.get<local_args::input_file_two>()}; - if(!filename2.empty()) - { - std::cout << cpp_tools::colors::blue << "<params> Input file 2: " << filename2 << cpp_tools::colors::reset - << '\n'; - } - std::vector<int> index; - index = parser.get<local_args::index_to_compare>(); + using position_type = scalfmm::container::point<value_type, Dimension>; + using box_type = scalfmm::component::box<position_type>; bool verbose = true; - scalfmm::io::FFmaGenericLoader<value_type, dimension> loader1(filename1, verbose); - scalfmm::io::FFmaGenericLoader<value_type, dimension> loader2(filename2, verbose); + scalfmm::io::FFmaGenericLoader<value_type, Dimension> loader1(file1, verbose); + scalfmm::io::FFmaGenericLoader<value_type, Dimension> loader2(file2, verbose); // // Allocation @@ -224,37 +203,90 @@ auto main([[maybe_unused]] int argc, [[maybe_unused]] char* argv[]) -> int loader1.fillParticles(particles1, nb_particles); loader2.fillParticles(particles2, nb_particles); - if(parser.exists<local_args::sort_particle>()) + if(sort_particle) { // define a box, used in the sort - + using morton_type = std::size_t; + const std::size_t max_level = (sizeof(morton_type) * 8 / Dimension) - 1; + // box_type box(loader1.getBoxWidth(), loader1.getCenterOfBox()); std::cout << "Sort needed !! " << std::endl; scalfmm::utils::sort_raw_array_with_morton_index(box, nb_particles, particles1); + scalfmm::utils::sort_raw_array_with_morton_index(box, nb_particles, particles2); } + // scalfmm::meta::td<decltype(index1)> u; + scalfmm::utils::compare_two_arrays("TAG ", Dimension, nb_particles, index1, index2, particles1, particles2); + return 0; +} +// +////////////////////////////////////////////////////////////////////////////// +// +auto main([[maybe_unused]] int argc, [[maybe_unused]] char* argv[]) -> int +{ + /// Parsing options + /// + auto parser = cpp_tools::cl_parser::make_parser( + cpp_tools::cl_parser::help{}, args::Dimension{}, local_args::input_file_one(), local_args::input_file_two(), + local_args::sort_particle(), local_args::index_to_compare(), local_args::index2_to_compare()); + + // Parameter handling + parser.parse(argc, argv); + + std::cout << cpp_tools::colors::blue << "Entering sort_particles...\n" << cpp_tools::colors::reset; + const int dimension{parser.get<args::Dimension>()}; + const auto filename1{parser.get<local_args::input_file_one>()}; + if(!filename1.empty()) + { + std::cout << cpp_tools::colors::blue << "<params> Input file 1: " << filename1 << cpp_tools::colors::reset + << '\n'; + } - if(parser.exists<local_args::index2_to_compare>()) + const auto filename2{parser.get<local_args::input_file_two>()}; + if(!filename2.empty()) + { + std::cout << cpp_tools::colors::blue << "<params> Input file 2: " << filename2 << cpp_tools::colors::reset + << '\n'; + } + std::vector<int> index; + index = parser.get<local_args::index_to_compare>(); + + // scalfmm::meta::td<decltype(ind ex)> u1; + + bool sort_particle{parser.exists<local_args::sort_particle>()}; + bool index2_exist{parser.exists<local_args::index2_to_compare>()}; + std::vector<int> index2; + + if(index2_exist) { - std::vector<int> index2; index2 = parser.get<local_args::index2_to_compare>(); if(index2.size() != index.size()) { std::cerr << "Wrong number of index between file1 and file2" << std::endl; std::exit(EXIT_FAILURE); } - scalfmm::utils::compare_two_arrays("TAG ", dimension, nb_particles, index, index2, particles1, particles2); } else { - scalfmm::utils::compare_two_arrays("TAG ", dimension, nb_particles, index, index, particles1, particles2); - - // + index2 = index; } - // auto error = compareTwoArrays<FReal, FmaRWParticle<FReal,8,8>* >("TAG", nbParticles, particles1, particles2); - // - // return int(error); - return 0; + if(dimension == 1) + { + return cmp<1>(filename1, filename2, sort_particle, index, index2); + } + else if(dimension == 2) + { + return cmp<2>(filename1, filename2, sort_particle, index, index2); + } + else if(dimension == 3) + { + return cmp<3>(filename1, filename2, sort_particle, index, index2); + } + else + { + throw std::invalid_argument("The dimension is wrong (1,2 or 3)"); + return 1; + } } diff --git a/units/fmm/count_kernel_mpi.cpp b/units/fmm/count_kernel_mpi.cpp index b3f23674fda14dca9677ae7dd3e093a071b61f76..2b561ffcb49ef9000018ca8039e1fbacc5b42be8 100644 --- a/units/fmm/count_kernel_mpi.cpp +++ b/units/fmm/count_kernel_mpi.cpp @@ -2,4 +2,46 @@ // // Units for test fmm // ---------------------- + +#include <cpp_tools/parallel_manager/parallel_manager.hpp> + +static cpp_tools::parallel_manager::parallel_manager para; + #include "units_count_kernel_mpi_gen.hpp" + +//auto run(const int& tree_height, const int& group_size, std::string& input_file, bool use_leaf_distribution, bool mutual = false) -> int{ + +TEMPLATE_TEST_CASE("test count 2d", "[test-count-2d]", double) +{ + // leaf distrubution and not mutual + SECTION("count 2d", "[count2d]") + { + run_count_kernel_mpi<2, double>(4, 10, path + "test_2d_ref.fma", true, false); + } // h = 5 +} + +TEMPLATE_TEST_CASE("test count 3d", "[test-count-3d]", double) +{ + // leaf distribution and mutual + SECTION("count 3d", "[count3d]") + { + run_count_kernel_mpi<3, double>(5, 40, path + "sphere-706_source.fma", true, true); + } // h = 5 +} + +TEMPLATE_TEST_CASE("test count 3d", "[test-count-3d]", float) +{ + // leaf distribution and mutual + SECTION("count 3d", "[count3d]") + { + run_count_kernel_mpi<3, float>(5, 40, path + "sphere-706_source.fma", true, true); + } // h = 5 +} +int main(int argc, char* argv[]) +{ + para.init(); + int result = Catch::Session().run(argc, argv); + para.end(); + + return result; +} diff --git a/units/fmm/units_count_kernel_mpi_gen.hpp.in b/units/fmm/units_count_kernel_mpi_gen.hpp.in index 5928e79710206f7a783c3d211691ff03845b63aa..53db7b703db3c2f753c0645b105b713e7ef28b42 100644 --- a/units/fmm/units_count_kernel_mpi_gen.hpp.in +++ b/units/fmm/units_count_kernel_mpi_gen.hpp.in @@ -20,7 +20,7 @@ #include "scalfmm/utils/generate.hpp" #include "scalfmm/utils/math.hpp" #include <cpp_tools/colors/colorized.hpp> -#include <cpp_tools/parallel_manager/parallel_manager.hpp> +// #include <cpp_tools/parallel_manager/parallel_manager.hpp> #define CATCH_CONFIG_RUNNER #include <catch2/catch.hpp> @@ -49,30 +49,28 @@ inline auto constexpr get_accumulate_shape() } template<int Dimension, typename value_type> -//auto run(cpp_tools::parallel_manager::parallel_manager& para, const int& tree_height, const int& group_size, -// Array const& pbc, const int nb_level_above_root, const bool readFile, std::string& input_file, -// const bool interaction, bool use_leaf_distribution, bool use_particle_distribution) -> int -auto run(const int& tree_height, const int& group_size, std::string const && input_file, bool use_leaf_distribution, bool mutual = false) -> int{ - - static constexpr std::size_t number_of_physical_values = 1; - // static constexpr std::size_t dimpow2 = scalfmm::math::pow(2, Dimension); - const auto runtime_order = 1; - bool use_particle_distribution{false}; - int level_shared{2}; - if(!use_leaf_distribution) - { - use_particle_distribution = true ; - } +auto run_count_kernel_mpi(const int& tree_height, const int& group_size, std::string const&& input_file, + bool use_leaf_distribution, bool mutual = false) -> int +{ + static constexpr std::size_t number_of_physical_values = 1; + const auto runtime_order = 1; + bool use_particle_distribution{false}; + int level_shared{2}; + if(!use_leaf_distribution) + { + use_particle_distribution = true; + } // - cpp_tools::parallel_manager::parallel_manager para; - para.init(); - + // cpp_tools::parallel_manager::parallel_manager para; + // para.init(); + const int rank = para.get_process_id(); const int nproc = para.get_num_processes(); // Parameter handling // ------------------------------------------------------------------------------ - using Particle_type = scalfmm::container::particle<value_type, Dimension, value_type, number_of_physical_values, value_type, 1>; + using Particle_type = + scalfmm::container::particle<value_type, Dimension, value_type, number_of_physical_values, value_type, 1>; using container_type = scalfmm::container::particle_container<Particle_type>; using position_type = typename Particle_type::position_type; using cell_type = @@ -87,16 +85,16 @@ auto run(const int& tree_height, const int& group_size, std::string const && in point_type box_center(0.0); value_type box_width{1.}; // - std::vector<bool> pbc(Dimension, false); + std::vector<bool> pbc(Dimension, false); int nb_level_above_root{-1}; - + container_type* container; std::vector<Particle_type> particles_set; std::size_t number_of_particles{}; std::size_t local_number_of_particles{}; { - bool verbose = true; //false; + bool verbose = true; //false; scalfmm::io::DistFmaGenericLoader<value_type, Dimension> loader(input_file, para, verbose); @@ -127,51 +125,7 @@ auto run(const int& tree_height, const int& group_size, std::string const && in particles_set[idx] = p; } } - /* - else - { - // generate particles: one par leaf, the octree is full. - number_of_particles = std::pow(dimpow2, (tree_height - 1)); - std::cout << "number_of_particles = " << number_of_particles << " box_width " << box_width << '\n'; - - auto number_of_values_per_dimension = std::size_t(scalfmm::math::pow(2, (tree_height - 1))); - const std::size_t bloc = number_of_particles / nproc; - - local_number_of_particles = (rank < nproc - 1) ? bloc : number_of_particles - (nproc - 1) * bloc; - particles_set.resize(local_number_of_particles); - // - const std::size_t start_index{rank * bloc}; - const std::size_t end_index{start_index + local_number_of_particles}; - std::cout << "start_index = " << start_index << " end_index = " << end_index << '\n'; - - value_type step{box_width / std::pow(2, (tree_height))}; - - std::cout << "Number of value per dimension = " << number_of_values_per_dimension << '\n'; - std::cout << "Step = " << step << '\n'; - - for(std::size_t index{start_index}, idx{0}; index < end_index; ++index, ++idx) - { - auto coord = scalfmm::index::get_coordinate_from_morton_index<Dimension>(index); - - point_type pos{coord}; - std::cout << idx << "index " << index << " coord " << coord << " centre: " << step * pos << std::endl; - particle_type p; - std::size_t ii{0}; - for(auto& e: p.position()) - { - e = -box_width * 0.5 + step * 0.5 + step * pos[ii++]; - } - particles_set[idx] = p; - } - } - */ - // std::cout << "pbc: " << std::boolalpha; - // for(auto e: pbc) - // { - // std::cout << e << " "; - // } - // std::cout << std::endl; box_type box(box_width, box_center); #ifdef scalfmm_BUILD_PBC box.set_periodicity(pbc); @@ -226,13 +180,9 @@ auto run(const int& tree_height, const int& group_size, std::string const && in scalfmm::io::trace(std::cout, letTree, 4); } - - //auto operator_to_proceed = scalfmm::algorithms::operators_to_proceed::farfield; - // auto operator_to_proceed = scalfmm::algorithms::operators_to_proceed::nearfield; + // auto operator_to_proceed = scalfmm::algorithms::operators_to_proceed::all; - // auto operator_to_proceed = (scalfmm::algorithms::operators_to_proceed::p2m | scalfmm::algorithms::operators_to_proceed::m2l); - // auto operator_to_proceed = (scalfmm::algorithms::operators_to_proceed::p2m | scalfmm::algorithms::operators_to_proceed::m2m | scalfmm::algorithms::operators_to_proceed::m2l) ; - + // scalfmm::algorithms::mpi::proc_task(letTree, fmm_operator, operator_to_proceed); // std::size_t nb_particles_min = 20 * number_of_particles, nb_particles_max = 0, nb_per = 1; @@ -273,45 +223,19 @@ auto run(const int& tree_height, const int& group_size, std::string const && in std::cout << "wrong number of particles - nb particles (min) " << nb_particles_min << " (max) " << nb_particles_max << " (expected) " << number_of_particles << std::endl; - if(para.io_master()) - std::cout << "Save Tree in parallel\n"; - // std::string outName("saveTree_" + std::to_string(rank) + ".bin"); - std::string outName("saveTreeLet.bin"); - std::string header("CHEBYSHEV LOW RANK "); - scalfmm::tools::io::save(para, outName, letTree, header); + // if(para.io_master()) + // std::cout << "Save Tree in parallel\n"; + // // std::string outName("saveTree_" + std::to_string(rank) + ".bin"); + // std::string outName("saveTreeLet.bin"); + // std::string header("CHEBYSHEV LOW RANK "); + // scalfmm::tools::io::save(para, outName, letTree, header); } std::cout << cpp_tools::colors::reset << '\n'; REQUIRE(right_number_of_particles); // - para.end(); + // para.end(); // return right_number_of_particles; } // - - -//auto run(const int& tree_height, const int& group_size, std::string& input_file, bool use_leaf_distribution, bool mutual = false) -> int{ - - -TEMPLATE_TEST_CASE("test count 2d", "[test-count-2d]", double) -{ - // leaf distrubution and not mutual - SECTION("count 2d", "[count2d]") { run<2,double>(4, 10, path + "test_2d_ref.fma", true, false); } // h = 5 -} - -/* -TEMPLATE_TEST_CASE("test count 3d", "[test-count-3d]", double) -{ - // leaf distrubution and mutual - SECTION("count 3d", "[count3d]") { run<3,double>(5, 40, path +"sphere-706_source.fma", true,true); } // h = 5 -} -*/ -int main(int argc, char* argv[]) -{ - int result = Catch::Session().run(argc, argv); - - return result; -} - - diff --git a/units/fmm/units_count_particles_mpi_gen.hpp b/units/fmm/units_count_particles_mpi_gen.hpp new file mode 100644 index 0000000000000000000000000000000000000000..9b8aecb2679e9ed36bb3beb9b7a1e5de54f34700 --- /dev/null +++ b/units/fmm/units_count_particles_mpi_gen.hpp @@ -0,0 +1,316 @@ +// @FUSE_MPI +#include "scalfmm/algorithms/fmm.hpp" +#include "scalfmm/algorithms/mpi/proc_task.hpp" +// +#include "scalfmm/container/particle.hpp" +#include "scalfmm/container/particle_container.hpp" +#include "scalfmm/operators/count_kernel/count_kernel.hpp" +// +#include "scalfmm/interpolation/grid_storage.hpp" +#include "scalfmm/meta/utils.hpp" +#include "scalfmm/tools/fma_dist_loader.hpp" +#include "scalfmm/tools/tree_io.hpp" +#include "scalfmm/tree/box.hpp" +#include "scalfmm/tree/cell.hpp" +#include "scalfmm/tree/dist_group_tree.hpp" +#include "scalfmm/tree/group_let.hpp" +#include "scalfmm/tree/leaf_view.hpp" +#include "scalfmm/tree/utils.hpp" +#include "scalfmm/utils/generate.hpp" + +#include <cpp_tools/colors/colorized.hpp> +#include <cpp_tools/parallel_manager/parallel_manager.hpp> + +#define CATCH_CONFIG_RUNNER +#include <catch2/catch.hpp> + +template<std::size_t dim> +inline auto constexpr get_accumulate_shape() +{ + if constexpr(dim == 1) + { + return std::array<std::size_t, dim>{1}; + } + if constexpr(dim == 2) + { + return std::array<std::size_t, dim>{1, 1}; + } + if constexpr(dim == 3) + { + return std::array<std::size_t, dim>{1, 1, 1}; + } + if constexpr(dim == 4) + { + return std::array<std::size_t, dim>{1, 1, 1, 1}; + } +} + +template<int Dimension, typename Array> +//auto run(cpp_tools::parallel_manager::parallel_manager& para, const int& tree_height, const int& group_size, +// Array const& pbc, const int nb_level_above_root, const bool readFile, std::string& input_file, +// const bool interaction, bool use_leaf_distribution, bool use_particle_distribution) -> int +auto run(const int& tree_height, const int& group_size, std::string& input_file, bool use_leaf_distribution, bool mutual = false) -> int{ + static constexpr std::size_t number_of_physical_values = 1; + static constexpr std::size_t dimpow2 = pow(2, Dimension); + const auto runtime_order = 1; + + int level_shared{2}; + if(use_leaf_distribution) { + use_particle_distribution = false ; + } + else { + use_particle_distribution = true ; + } + // + const int rank = para.get_process_id(); + const int nproc = para.get_num_processes(); + // Parameter handling + if(readFile) + { + if(!input_file.empty()) + { + std::cout << cpp_tools::colors::blue << "<params> Input file : " << input_file << cpp_tools::colors::reset + << '\n'; + } + } + + // ------------------------------------------------------------------------------ + using particle_type = scalfmm::container::particle<double, Dimension, double, number_of_physical_values, double, 1>; + using container_type = scalfmm::container::particle_container<particle_type>; + using position_type = typename particle_type::position_type; + using cell_type = + scalfmm::component::cell<scalfmm::component::grid_storage<double, Dimension, number_of_physical_values, 1>>; + using leaf_type = scalfmm::component::leaf_view<particle_type>; + using box_type = scalfmm::component::box<position_type>; + using group_tree_type = scalfmm::component::dist_group_tree<cell_type, leaf_type, box_type>; + // + // ------------------------------------------------------------------------------ + // + using point_type = scalfmm::container::point<double, Dimension>; + point_type box_center(0.0); + double box_width{1.}; + // + container_type* container; + std::vector<particle_type> particles_set; + + std::size_t number_of_particles{}; + std::size_t local_number_of_particles{}; + if(readFile) // Read particles from a file + { + bool verbose = false; + + scalfmm::io::DistFmaGenericLoader<double, Dimension> loader(input_file, para, verbose); + + number_of_particles = loader.getNumberOfParticles(); + local_number_of_particles = loader.getMyNumberOfParticles(); + number_of_particles = loader.getNumberOfParticles(); + box_width = loader.getBoxWidth(); + box_center = loader.getBoxCenter(); + + auto nb_val_to_red_per_part = loader.get_dimension() + loader.get_number_of_input_per_record(); + double* values_to_read = new double[nb_val_to_red_per_part]{}; + container = new container_type(local_number_of_particles); + particles_set.resize(local_number_of_particles); + for(std::size_t idx = 0; idx < local_number_of_particles; ++idx) + { + loader.fillParticle(values_to_read, nb_val_to_red_per_part); + particle_type p; + std::size_t ii{0}; + for(auto& e: p.position()) + { + e = values_to_read[ii++]; + } + for(auto& e: p.inputs()) + { + e = values_to_read[ii++]; + } + // container->insert_particle(idx, p); + particles_set[idx] = p; + } + } + else + { + // generate particles: one par leaf, the octree is full. + number_of_particles = std::pow(dimpow2, (tree_height - 1)); + std::cout << "number_of_particles = " << number_of_particles << " box_width " << box_width << '\n'; + + auto number_of_values_per_dimension = std::size_t(scalfmm::math::pow(2, (tree_height - 1))); + const std::size_t bloc = number_of_particles / nproc; + + local_number_of_particles = (rank < nproc - 1) ? bloc : number_of_particles - (nproc - 1) * bloc; + particles_set.resize(local_number_of_particles); + + // + const std::size_t start_index{rank * bloc}; + const std::size_t end_index{start_index + local_number_of_particles}; + std::cout << "start_index = " << start_index << " end_index = " << end_index << '\n'; + + double step{box_width / std::pow(2, (tree_height))}; + + std::cout << "Number of value per dimension = " << number_of_values_per_dimension << '\n'; + std::cout << "Step = " << step << '\n'; + + for(std::size_t index{start_index}, idx{0}; index < end_index; ++index, ++idx) + { + auto coord = scalfmm::index::get_coordinate_from_morton_index<Dimension>(index); + + point_type pos{coord}; + std::cout << idx << "index " << index << " coord " << coord << " centre: " << step * pos << std::endl; + particle_type p; + std::size_t ii{0}; + for(auto& e: p.position()) + { + e = -box_width * 0.5 + step * 0.5 + step * pos[ii++]; + } + particles_set[idx] = p; + } + } + // std::cout << "pbc: " << std::boolalpha; + // for(auto e: pbc) + // { + // std::cout << e << " "; + // } + // std::cout << std::endl; + box_type box(box_width, box_center); +#ifdef SCALFMM_BUILD_PBC + box.set_periodicity(pbc); +#endif + std::cout << "Box: " << box << std::endl; + /////////////////////////////////////////////////////////////////////////////////////////////////////// + /// Set particles in the tree and construct the let + /// 1) sort the particles according to their Morton index + /// 2) construct the tree, then the let + /// + const int leaf_level = tree_height - 1; + // separation criteria used to construct M2L | P2P ghosts + int separation = 1; + // Construct the LET + auto letTree = scalfmm::tree::let::buildLetTree<group_tree_type>( + para, number_of_particles, particles_set, box, leaf_level, level_shared, group_size, group_size, runtime_order, + separation, use_leaf_distribution, use_particle_distribution); + + // if(para.io_master()) + { + std::cout << cpp_tools::colors::blue << "Print tree distribution\n"; + letTree.print_distrib(std::cout); + std::cout << cpp_tools::colors::reset; + } + +#ifdef SCALFMM_BUILD_PBC + std::cerr << cpp_tools::color::red << "Doesn't work with PBC \n" << cpp_tools::color::reset; + letTree.set_levels_above_root(nb_level_above_root); +#endif + // + /////////////////////////////////// + // using fmm_operator_type = count_kernels::particles::count_fmm_operator<Dimension>; + // // fmm_operator_type fmm_operator{}; + using fmm_operator_type = scalfmm::operators::fmm_operators<count_kernels::particles::count_near_field, + count_kernels::particles::count_far_field<Dimension>>; + bool mutual = false; + int const& separation_criterion = separation; // fmm_operator.near_field().separation_criterion(); + + count_kernels::particles::count_near_field nf(mutual); + count_kernels::particles::count_far_field<Dimension> ff{}; + fmm_operator_type fmm_operator(nf, ff); + std::cout << cpp_tools::colors::red << "build_interaction_lists \n" << cpp_tools::colors::reset << std::flush; + + scalfmm::list::sequential::build_interaction_lists(letTree, letTree, separation_criterion, mutual); + std::cout << cpp_tools::colors::red << "trace \n" << cpp_tools::colors::reset << std::flush; + // if(para.io_master()) + { + std::cout << cpp_tools::colors::red << "trace 2\n" << cpp_tools::colors::reset << std::flush; + + scalfmm::io::trace(std::cout, letTree, 2); + std::cout << cpp_tools::colors::red << "trace 4\n" << cpp_tools::colors::reset << std::flush; + + scalfmm::io::trace(std::cout, letTree, 4); + } + + //auto operator_to_proceed = scalfmm::algorithms::operators_to_proceed::farfield; + // auto operator_to_proceed = scalfmm::algorithms::operators_to_proceed::nearfield; + auto operator_to_proceed = scalfmm::algorithms::operators_to_proceed::all; + // auto operator_to_proceed = (scalfmm::algorithms::operators_to_proceed::p2m | scalfmm::algorithms::operators_to_proceed::m2l); + // auto operator_to_proceed = (scalfmm::algorithms::operators_to_proceed::p2m | scalfmm::algorithms::operators_to_proceed::m2m | scalfmm::algorithms::operators_to_proceed::m2l) ; + + scalfmm::algorithms::mpi::proc_task(letTree, fmm_operator, operator_to_proceed); + // + std::size_t nb_particles_min = 20 * number_of_particles, nb_particles_max = 0, nb_per = 1; + bool right_number_of_particles = true; + int nb_part_above = 1; + // Construct the total number of particles + for(int d = 0; d < Dimension; ++d) + { + if(pbc[d]) + { + nb_per *= 3; + nb_part_above *= std::pow(2, nb_level_above_root + 1); + } + } + number_of_particles *= nb_per; + number_of_particles *= nb_part_above; + scalfmm::component::for_each_mine_leaf( + letTree.begin_mine_leaves(), letTree.end_mine_leaves(), + [&right_number_of_particles, number_of_particles, &nb_particles_max, &nb_particles_min](auto const& leaf) + { + size_t nb_part = std::get<0>(*scalfmm::container::outputs_begin(leaf.particles())); + nb_particles_max = std::max(nb_particles_max, nb_part); + nb_particles_min = std::min(nb_particles_min, nb_part); + if(nb_part != number_of_particles) + { + std::cout << cpp_tools::colors::red << "wrong number of particles - index " << leaf.index() + << " nb particles " << nb_part << std::endl; + right_number_of_particles = false; + } + }); + std::cout << cpp_tools::colors::reset << '\n'; + if(right_number_of_particles) + { + std::cout << "Found the right number of particles - nb particles " << number_of_particles << std::endl; + } + else + { + std::cout << "wrong number of particles - nb particles (min) " << nb_particles_min << " (max) " + << nb_particles_max << " (expected) " << number_of_particles << std::endl; + + if(para.io_master()) + std::cout << "Save Tree in parallel\n"; + // std::string outName("saveTree_" + std::to_string(rank) + ".bin"); + std::string outName("saveTreeLet.bin"); + std::string header("CHEBYSHEV LOW RANK "); + scalfmm::tools::io::save(para, outName, letTree, header); + } + std::cout << cpp_tools::colors::reset << '\n'; + REQUIRE(right_number_of_particles); + + return right_number_of_particles; +} + +// + + +//auto run(const int& tree_height, const int& group_size, std::string& input_file, bool use_leaf_distribution, bool mutual = false) -> int{ +/* +TEMPLATE_TEST_CASE("test count 1d", "[test-count-1d]", double) +{ + SECTION("count 1d", "[count1d]") { run<1>(4, 10, "../data/units/test_2d_ref.fma", false); } // h = 5 +} +*/ +TEMPLATE_TEST_CASE("test count 2d", "[test-count-2d]", double) +{ + SECTION("count 2d", "[count2d]") { run<2>(3, 10, "../data/units/test_2d_ref.fma", false); } // h = 5 +} + +TEMPLATE_TEST_CASE("test count 3d", "[test-count-3d]", double) +{ + // leaf distrubution and mutual + SECTION("count 3d", "[count3d]") { run<3>(5, 40, "../data/units/sphere-706_source.fma", true,true); } // h = 5 +} + +int main(int argc, char* argv[]) +{ + int result = Catch::Session().run(argc, argv); + + return result; +} + +