diff --git a/include/scalfmm/algorithms/mpi/upward.hpp b/include/scalfmm/algorithms/mpi/upward.hpp index 874565ea0219a6844f46d6b17beab9f27af9ced0..53d435ce963d15b9c456ef89f1678cb2f0e1c16e 100644 --- a/include/scalfmm/algorithms/mpi/upward.hpp +++ b/include/scalfmm/algorithms/mpi/upward.hpp @@ -291,268 +291,169 @@ namespace scalfmm::algorithms::mpi::pass // } } } - // std::cout << " END start_communications" << std::endl << std::flush; - // #pragma omp taskwait - // -} -template<typename Tree> -void prepare_comm_up(Tree tree, const int level) -{ - using value_type = typename Tree::base_type::cell_type::value_type; - using dep_type = typename Tree::group_of_cell_type::symbolics_type::ptr_multi_dependency_type; - - static constexpr std::size_t dimension = Tree::base_type::box_type::dimension; - static constexpr int nb_inputs = Tree::cell_type::storage_type::inputs_size; - // - // number of theoretical children - constexpr int nb_children = math::pow(2, dimension); - static constexpr auto prio{omp::priorities::max}; - // - auto& para = tree.get_parallel_manager(); - auto& comm = para.get_communicator(); - auto ptr_comm = &comm; - auto rank = comm.rank(); - int nb_proc = comm.size(); - int tag_nb = 1200 + 10 * level; - int tag_data = 1201 + 10 * level; - auto mpi_int_type = cpp_tools::parallel_manager::mpi::get_datatype<int>(); - // level where the multipoles are known - auto level_child = level + 1; - // iterator on the child cell groups - auto it_first_group_of_child = tree.begin_mine_cells(level_child); - // get size of multipoles - auto it_first_cell_child = it_first_group_of_child->get()->begin(); - auto const& m1 = it_first_cell_child->cmultipoles(); - int size{int(nb_inputs * m1.at(0).size()) * nb_children}; //only nb_children -1 is needed in the worse case - - auto msg = tree.get_up_down_access(level); - - auto const& distrib = tree.get_cell_distribution(level); - auto const& distrib_child = tree.get_cell_distribution(level_child); - bool send_data{false}, receive_data{false}; - - auto ptr_tree = &tree; - - if(rank > 0) + template<typename Tree> + void prepare_comm_up(Tree tree, const int level) { - // If we send the multipoles, they must have been updated by the M2M of the previous level! - // Check if you have to send something on the left (rank-1) - // We are at level l and the children are at level l+1. - // The ghost at the child level(on the right) - // must be updated if the parent index of the first child index is not equal to(lower than) - // the first index at level l. - // serialization - // check if we have some child to send (if there exists they starts at the first group) - - // first_index_child = morton index of the first child cell (at level = level_child) - auto first_index_child = distrib_child[rank][0]; - // index_parent = first index of the cell at current level on my process - auto index_parent = distrib[rank][0]; - // parent_morton_index = last index of the cell on previous process at current level - auto previous_parent_morton_index = distrib[rank - 1][1] - 1; - // first_index_child = morton index of the first child cell (at level = level_child) - auto last_index_child_previous = distrib_child[rank - 1][1] - 1; + using value_type = typename Tree::base_type::cell_type::value_type; + using dep_type = typename Tree::group_of_cell_type::symbolics_type::ptr_multi_dependency_type; + + static constexpr std::size_t dimension = Tree::base_type::box_type::dimension; + static constexpr int nb_inputs = Tree::cell_type::storage_type::inputs_size; // - send_data = (first_index_child >> dimension) == previous_parent_morton_index; + // number of theoretical children + constexpr int nb_children = math::pow(2, dimension); + static constexpr auto prio{omp::priorities::max}; + // + auto& para = tree.get_parallel_manager(); + auto& comm = para.get_communicator(); + auto ptr_comm = &comm; + auto rank = comm.rank(); + int nb_proc = comm.size(); + int tag_nb = 1200 + 10 * level; + int tag_data = 1201 + 10 * level; + auto mpi_int_type = cpp_tools::parallel_manager::mpi::get_datatype<int>(); + // level where the multipoles are known + auto level_child = level + 1; + // iterator on the child cell groups + auto it_first_group_of_child = tree.begin_mine_cells(level_child); + // get size of multipoles + auto it_first_cell_child = it_first_group_of_child->get()->begin(); + auto const& m1 = it_first_cell_child->cmultipoles(); + int size{int(nb_inputs * m1.at(0).size()) * nb_children}; //only nb_children -1 is needed in the worse case - if(send_data) - { - std::clog << "upward(task send(" << level << ")) start \n"; - int count{0}; - std::clog << "upward(task send(" << level << ")) index " << first_index_child - << " parent_of_first_index_child " << previous_parent_morton_index << std::endl - << std::flush; - // index is now the parent of the first child - auto index = first_index_child >> dimension; - // I have to send - // find the number of children to send (get pointer on multipoles !!) - // Construct an MPI datatype + auto msg = tree.get_up_down_access(level); + + auto const& distrib = tree.get_cell_distribution(level); + auto const& distrib_child = tree.get_cell_distribution(level_child); + bool send_data{false}, receive_data{false}; + + auto ptr_tree = &tree; + if(rank > 0) + { + // If we send the multipoles, they must have been updated by the M2M of the previous level! + // Check if you have to send something on the left (rank-1) + // We are at level l and the children are at level l+1. + // The ghost at the child level(on the right) + // must be updated if the parent index of the first child index is not equal to(lower than) + // the first index at level l. // serialization - auto it_group = it_first_group_of_child; - // - auto it_cell = it_first_group_of_child->get()->begin(); - auto next = scalfmm::component::generate_linear_iterator(1, it_group, it_cell); + // check if we have some child to send (if there exists they starts at the first group) + + // first_index_child = morton index of the first child cell (at level = level_child) + auto first_index_child = distrib_child[rank][0]; + // index_parent = first index of the cell at current level on my process + auto index_parent = distrib[rank][0]; + // parent_morton_index = last index of the cell on previous process at current level + auto previous_parent_morton_index = distrib[rank - 1][1] - 1; + // first_index_child = morton index of the first child cell (at level = level_child) + auto last_index_child_previous = distrib_child[rank - 1][1] - 1; // - // compute the number of cells to send and copy the multipoles in the buffer - for(int i = 0; i < nb_children - 1; ++i, next()) + send_data = (first_index_child >> dimension) == previous_parent_morton_index; + + if(send_data) { - if(previous_parent_morton_index < (it_cell->index() >> dimension)) + std::clog << "upward(task send(" << level << ")) start \n"; + int count{0}; + std::clog << "upward(task send(" << level << ")) index " << first_index_child + << " parent_of_first_index_child " << previous_parent_morton_index << std::endl + << std::flush; + // index is now the parent of the first child + auto index = first_index_child >> dimension; + // I have to send + // find the number of children to send (get pointer on multipoles !!) + // Construct an MPI datatype + + // serialization + auto it_group = it_first_group_of_child; + // + auto it_cell = it_first_group_of_child->get()->begin(); + auto next = scalfmm::component::generate_linear_iterator(1, it_group, it_cell); + // + // compute the number of cells to send and copy the multipoles in the buffer + for(int i = 0; i < nb_children - 1; ++i, next()) { - break; + if(previous_parent_morton_index < (it_cell->index() >> dimension)) + { + break; + } + std::clog << "upward(task send) Check children P " << index << " C " << it_cell->index() + << " level " << it_cell->csymbolics().level << std::endl + << std::flush; + ++count; } - std::clog << "upward(task send) Check children P " << index << " C " << it_cell->index() << " level " - << it_cell->csymbolics().level << std::endl - << std::flush; - ++count; - } - std::clog << "upward(task send) nb_send = " << count << std::endl; - ptr_comm->isend(&count, 1, mpi_int_type, rank - 1, tag_nb); - - msg.set_nb_child_to_send(count); - } - } - if(rank < nb_proc - 1) - { - // last_index_child = morton index of the last child cell (at level = level_child) - auto last_index_child = distrib_child[rank][1] - 1; - // parent_morton_index = last index of the cell on previous process at current level - auto parent_morton_index = distrib[rank][1] - 1; - // first_index_child_next = morton index of the first child cell (at level = level_child) on next mpi process - auto first_index_child_next = distrib_child[rank + 1][0]; - receive_data = (last_index_child >> dimension) == (first_index_child_next >> dimension); - if(receive_data) - { - // std::cout << "upward receive comm last_index_child " << last_index_child << " parent " - // << (last_index_child >> dimension) << " next child " << first_index_child_next - // << " its parent " << (first_index_child_next >> dimension) << std::endl - // << std::flush; + std::clog << "upward(task send) nb_send = " << count << std::endl; + ptr_comm->isend(&count, 1, mpi_int_type, rank - 1, tag_nb); - std::clog << std::flush; - { - std::clog << "upward(task receive(" << level << ")) start \n"; - int count{0}; - std::vector<value_type> buffer(size); - ptr_comm->recv(&count, 1, mpi_int_type, rank + 1, tag_nb); - std::clog << "upward(task receive(" << level << ")) " << count << " cell(s)\n" << std::flush; - msg.set_nb_child_to_receive(count); + msg.set_nb_child_to_send(count); } } - } -}; -/// @brief This function constructs the local approximation for all the cells of the tree by applying the operator -/// m2m -/// -/// @param tree the tree target -/// @param approximation the approximation to construct the local approximation -/// -template<typename Tree, typename Approximation> -inline auto upward(Tree& tree, Approximation const& approximation) -> void -{ - // If we send the multipoles, they must have been updated by the M2M of the previous level! - // Check if you have to send something on the left (rank-1) - // We are at level l and the children are at level l+1. - // The ghost at the child level(on the right) - // must be updated if the parent index of the first child index is not equal to(lower than) - // the first index at level l. - // serialization - // check if we have some child to send (if there exists they starts at the first group) - - // first_index_child = morton index of the first child cell (at level = level_child) - auto first_index_child = distrib_child[rank][0]; - // index_parent = first index of the cell at current level on my process - auto index_parent = distrib[rank][0]; - // parent_morton_index = last index of the cell on previous process at current level - auto previous_parent_morton_index = distrib[rank - 1][1] - 1; - // first_index_child = morton index of the first child cell (at level = level_child) - auto last_index_child_previous = distrib_child[rank - 1][1] - 1; - // - send_data = (first_index_child >> dimension) == previous_parent_morton_index; - - if(send_data) - { - std::clog << "upward(task send(" << level << ")) start \n"; - int count{0}; - std::clog << "upward(task send(" << level << ")) index " << first_index_child - << " parent_of_first_index_child " << previous_parent_morton_index << std::endl - << std::flush; - // index is now the parent of the first child - auto index = first_index_child >> dimension; - // I have to send - // find the number of children to send (get pointer on multipoles !!) - // Construct an MPI datatype - - // serialization - auto it_group = it_first_group_of_child; - // - auto it_cell = it_first_group_of_child->get()->begin(); - auto next = scalfmm::component::generate_linear_iterator(1, it_group, it_cell); - // - // compute the number of cells to send and copy the multipoles in the buffer - for(int i = 0; i < nb_children - 1; ++i, next()) + if(rank < nb_proc - 1) { - if(previous_parent_morton_index < (it_cell->index() >> dimension)) + // last_index_child = morton index of the last child cell (at level = level_child) + auto last_index_child = distrib_child[rank][1] - 1; + // parent_morton_index = last index of the cell on previous process at current level + auto parent_morton_index = distrib[rank][1] - 1; + // first_index_child_next = morton index of the first child cell (at level = level_child) on next mpi process + auto first_index_child_next = distrib_child[rank + 1][0]; + receive_data = (last_index_child >> dimension) == (first_index_child_next >> dimension); + if(receive_data) { - break; + // std::cout << "upward receive comm last_index_child " << last_index_child << " parent " + // << (last_index_child >> dimension) << " next child " << first_index_child_next + // << " its parent " << (first_index_child_next >> dimension) << std::endl + // << std::flush; + + std::clog << std::flush; + { + std::clog << "upward(task receive(" << level << ")) start \n"; + int count{0}; + std::vector<value_type> buffer(size); + ptr_comm->recv(&count, 1, mpi_int_type, rank + 1, tag_nb); + std::clog << "upward(task receive(" << level << ")) " << count << " cell(s)\n" << std::flush; + msg.set_nb_child_to_receive(count); + } } - std::clog << "upward(task send) Check children P " << index << " C " << it_cell->index() << " level " - << it_cell->csymbolics().level << std::endl - << std::flush; - ++count; } - std::clog << "upward(task send) nb_send = " << count << std::endl; - ptr_comm->isend(&count, 1, mpi_int_type, rank - 1, tag_nb); - - msg.set_nb_child_to_send(count); - } -} -if(rank < nb_proc - 1) -{ - // last_index_child = morton index of the last child cell (at level = level_child) - auto last_index_child = distrib_child[rank][1] - 1; - // parent_morton_index = last index of the cell on previous process at current level - auto parent_morton_index = distrib[rank][1] - 1; - // first_index_child_next = morton index of the first child cell (at level = level_child) on next mpi process - auto first_index_child_next = distrib_child[rank + 1][0]; - receive_data = (last_index_child >> dimension) == (first_index_child_next >> dimension); - if(receive_data) + }; + /// @brief This function constructs the local approximation for all the cells of the tree by applying the operator + /// m2m + /// + /// @param tree the tree target + /// @param approximation the approximation to construct the local approximation + /// + template<typename Tree, typename Approximation> + inline auto upward(Tree& tree, Approximation const& approximation) -> void { - // std::cout << "upward receive comm last_index_child " << last_index_child << " parent " - // << (last_index_child >> dimension) << " next child " << first_index_child_next - // << " its parent " << (first_index_child_next >> dimension) << std::endl - // << std::flush; + auto leaf_level = tree.height() - 1; - std::clog << std::flush; + // upper working level is + const int top_height = tree.box().is_periodic() ? 0 : 2; + // const int start_duplicated_level = tree.start_duplicated_level(); + // + // int top = start_duplicated_level < 0 ? top_height : start_duplicated_level - 1; + int top = top_height; + for(int level = leaf_level - 1; level >= top /*top_height*/; --level) // int because top_height could be 0 { - std::clog << "upward(task receive(" << level << ")) start \n"; - int count{0}; - std::vector<value_type> buffer(size); - ptr_comm->recv(&count, 1, mpi_int_type, rank + 1, tag_nb); - std::clog << "upward(task receive(" << level << ")) " << count << " cell(s)\n" << std::flush; - msg.set_nb_child_to_receive(count); + // std::cout << "M2M : " << level + 1 << " -> " << level << std::endl << std::flush; + // + start_communications(level, tree); + // std::cout << " end comm " << std::endl << std::flush; + + omp::pass::upward_level(level, tree, approximation); + // std::cout << " end upward_level " << level << std::endl << std::flush; } - } -} -} -; -/// @brief This function constructs the local approximation for all the cells of the tree by applying the operator -/// m2m -/// -/// @param tree the tree target -/// @param approximation the approximation to construct the local approximation -/// -template<typename Tree, typename Approximation> -inline auto upward(Tree& tree, Approximation const& approximation) -> void -{ - auto leaf_level = tree.height() - 1; - - // upper working level is - const int top_height = tree.box().is_periodic() ? 0 : 2; - // const int start_duplicated_level = tree.start_duplicated_level(); - // - // int top = start_duplicated_level < 0 ? top_height : start_duplicated_level - 1; - int top = top_height; - for(int level = leaf_level - 1; level >= top /*top_height*/; --level) // int because top_height could be 0 - { - // std::cout << "M2M : " << level + 1 << " -> " << level << std::endl << std::flush; + // std::cout << "end upward " << std::endl << std::flush; + // - start_communications(level, tree); - // std::cout << " end comm " << std::endl << std::flush; - omp::pass::upward_level(level, tree, approximation); - // std::cout << " end upward_level " << level << std::endl << std::flush; + // for(int level = start_duplicated_level; level >= top_height; --level) // int because top_height could be 0 + // { + // std::cout << "Level duplicated (seq): " << level << std::endl; + // upward_level(level, tree, approximation); + // } } - // std::cout << "end upward " << std::endl << std::flush; - - // - - // for(int level = start_duplicated_level; level >= top_height; --level) // int because top_height could be 0 - // { - // std::cout << "Level duplicated (seq): " << level << std::endl; - // upward_level(level, tree, approximation); - // } -} } // namespace scalfmm::algorithms::mpi::pass #endif // _OPENMP diff --git a/include/scalfmm/algorithms/omp/transfer.hpp b/include/scalfmm/algorithms/omp/transfer.hpp index bb4b5d7698999671877438ca2d87cd2d60e041b3..c8406be836e043298d55add80872d8a2b685b516 100644 --- a/include/scalfmm/algorithms/omp/transfer.hpp +++ b/include/scalfmm/algorithms/omp/transfer.hpp @@ -69,7 +69,7 @@ namespace scalfmm::algorithms::omp::pass auto group_ptr = cell_target_level_it->get(); // dependence on the first local of the group auto ptr_local_raw = &(group_ptr->ccomponent(0).clocals(0)); - static constexpr auto prio{priorities::m2l}; + auto prio{priorities::m2l}; #ifdef SCALFMM_USE_MPI // Increase th priority on the last group (For L2L in MPI) diff --git a/include/scalfmm/algorithms/omp/upward.hpp b/include/scalfmm/algorithms/omp/upward.hpp index a6e972df66c4bfa4cdd4c35aa3d1c4c5829ab7c6..f5e70fed7ea74761497a7f9438d16d300cfc3f26 100644 --- a/include/scalfmm/algorithms/omp/upward.hpp +++ b/include/scalfmm/algorithms/omp/upward.hpp @@ -5,7 +5,7 @@ #ifndef SCALFMM_ALGORITHMS_OMP_UPWARD_HPP #define SCALFMM_ALGORITHMS_OMP_UPWARD_HPP -#ifdef _OPENMP +#include <limits> #include "scalfmm/operators/m2m.hpp" #include "scalfmm/operators/tags.hpp" @@ -13,7 +13,7 @@ #include "scalfmm/utils/massert.hpp" #include "scalfmm/utils/math.hpp" -#include <limits> +#ifdef _OPENMP #include <omp.h> namespace scalfmm::algorithms::omp::pass diff --git a/include/scalfmm/tools/fma_dist_loader.hpp b/include/scalfmm/tools/fma_dist_loader.hpp index 9e8f9d39bd57a55636fc1596313e215a4365cb23..28be4b05dd7f37dbdbd973116e6092044c411f9c 100644 --- a/include/scalfmm/tools/fma_dist_loader.hpp +++ b/include/scalfmm/tools/fma_dist_loader.hpp @@ -161,7 +161,7 @@ namespace scalfmm::io int m_headerSize; ///< size of the header in byte - int _nbDataTowritePerRecord; ///< number of data to write for one particle. + int m_nbDataTowritePerRecord; ///< number of data to write for one particle. std::size_t m_numberOfParticles; ///< number of particle (global) to write in the file. @@ -315,11 +315,11 @@ namespace scalfmm::io * @brief Write all for all particles the position, physical values, potential and forces. * * @tparam TreeType - * @param myOctree the octree + * @param tree the octree * @param nbParticles number of particles. */ template<typename TreeType> - auto writeFromTree(const TreeType& myOctree, std::size_t const& nbParticles) -> void + auto writeFromTree(const TreeType& tree, std::size_t const& nbParticles) -> void { // The header is already written static constexpr int dimension = TreeType::dimension; diff --git a/include/scalfmm/tree/group_let.hpp b/include/scalfmm/tree/group_let.hpp index 597597c1af30456984cc5b2247201f59ee321252..1b7e1fd7b639d955bda9146a606d053531cf2f36 100644 --- a/include/scalfmm/tree/group_let.hpp +++ b/include/scalfmm/tree/group_let.hpp @@ -1,10 +1,5 @@ -// -------------------------------- -// See LICENCE file at project root -// File : scalfmm/tree/group_let.hpp -// -------------------------------- -#ifndef SCALFMM_TREE_LET_HPP +#ifndef SCALFMM_TREE_LET_HPP #define SCALFMM_TREE_LET_HPP - #include <algorithm> #include <array> #include <fstream> @@ -15,6 +10,13 @@ #include <utility> #include <vector> +#include <cpp_tools/colors/colorized.hpp> +#include <cpp_tools/parallel_manager/parallel_manager.hpp> + +#include <scalfmm/tree/utils.hpp> +#include <scalfmm/utils/io_helpers.hpp> // for io::print +#include <scalfmm/utils/math.hpp> + #include "scalfmm/container/particle_container.hpp" #include "scalfmm/lists/sequential.hpp" #include "scalfmm/meta/utils.hpp" @@ -22,11 +24,8 @@ #include "scalfmm/parallel/mpi/utils.hpp" #include "scalfmm/parallel/utils.hpp" #include "scalfmm/tree/for_each.hpp" -#include "scalfmm/tree/utils.hpp" -#include "scalfmm/utils/io_helpers.hpp" -#include "scalfmm/utils/math.hpp" - #ifdef SCALFMM_USE_MPI + #include <inria/algorithm/distributed/distribute.hpp> #include <inria/algorithm/distributed/mpi.hpp> #include <inria/algorithm/distributed/sort.hpp> @@ -34,23 +33,15 @@ #include <mpi.h> #endif -#include <cpp_tools/colors/colorized.hpp> -#include <cpp_tools/parallel_manager/parallel_manager.hpp> - namespace scalfmm::tree { using morton_type = std::int64_t; // typename Tree_type:: - /** - * @brief - * - * @tparam MortonIdxType - */ - template<typename MortonIdxType> + template<typename MortonIdx> struct leaf_info_type { - using morton_type = MortonIdxType; - MortonIdxType morton{}; + using morton_type = MortonIdx; + MortonIdx morton{}; std::size_t number_of_particles{}; friend std::ostream& operator<<(std::ostream& os, const leaf_info_type& w) { @@ -62,18 +53,11 @@ namespace scalfmm::tree namespace let { - /** - * @brief - * - * @tparam BoxType - * @tparam VectorLeafInfoType - * @tparam MortonDistributionType - */ - template<typename BoxType, typename VectorLeafInfoType, typename MortonDistributionType> - inline /*std::vector<morton_type>*/ VectorLeafInfoType - get_ghosts_p2p_interaction(cpp_tools::parallel_manager::parallel_manager& para, BoxType const& box, - std::size_t const& level, int const& separation, VectorLeafInfoType const& leaf_info, - MortonDistributionType const& leaves_distrib) + template<typename Box, typename VectorLeafInfo, typename MortonDistribution> + inline /*std::vector<morton_type>*/ VectorLeafInfo + get_ghosts_p2p_interaction(cpp_tools::parallel_manager::parallel_manager& para, Box const& box, + std::size_t const& level, int const& separation, VectorLeafInfo const& leaf_info, + MortonDistribution const& leaves_distrib) { std::vector<morton_type> ghost_to_add; auto const& period = box.get_periodicity(); @@ -83,11 +67,12 @@ namespace scalfmm::tree for(auto const& info: leaf_info) { auto const& morton_index = info.morton; - auto coordinate{index::get_coordinate_from_morton_index<BoxType::dimension>(morton_index)}; + auto coordinate{index::get_coordinate_from_morton_index<Box::dimension>(morton_index)}; auto interaction_neighbors = index::get_neighbors(coordinate, level, period, separation); auto& list = std::get<0>(interaction_neighbors); auto nb = std::get<1>(interaction_neighbors); int it{0}; + //io::print("rank(" + std::to_string(rank) + ") list idx(p2p) : ", list); while(list[it] < my_distrib[0]) { @@ -114,7 +99,7 @@ namespace scalfmm::tree std::sort(ghost_to_add.begin(), ghost_to_add.end()); auto last = std::unique(ghost_to_add.begin(), ghost_to_add.end()); ghost_to_add.erase(last, ghost_to_add.end()); - VectorLeafInfoType ghost_leaf_to_add(ghost_to_add.size()); + VectorLeafInfo ghost_leaf_to_add(ghost_to_add.size()); for(int i = 0; i < ghost_to_add.size(); ++i) { ghost_leaf_to_add[i] = {ghost_to_add[i], 0}; @@ -122,32 +107,25 @@ namespace scalfmm::tree return ghost_leaf_to_add; } - - /** - * @brief get theoretical m2l interaction list outside me - * - * We return the list of indexes of cells involved in P2P interaction that we do - * not have locally. The cells on other processors may not exist. - * - * @tparam BoxType - * @tparam VectorMortonIdxType - * @tparam MortonDistributionType - * @param[in] para the parallel manager - * @param box - * @param level - * @param separation - * @param local_morton_vect - * @param cell_distrib the cells distribution on the processes - * @return the list of indexes on tother processes - */ - template<typename BoxType, typename VectorMortonIdxType, typename MortonDistributionType> - inline VectorMortonIdxType get_ghosts_m2l_interaction(cpp_tools::parallel_manager::parallel_manager& para, - BoxType const& box, const std::size_t& level, - int const& separation, - VectorMortonIdxType const& local_morton_vect, - MortonDistributionType const& cell_distrib) + /// + /// \brief get theoretical m2l interaction list outside me + /// + /// We return the list of indexes of cells involved in P2P interaction that we do + /// not have locally. The cells on other processors may not exist. + /// + /// \param[in] para the parallel manager + /// \param tree the tree used to compute the interaction + /// \param local_morton_idx the local morton index of the cells + /// \param cell_distrib the cells distribution on the processes + /// \return the list of indexes on tother processes + /// + template<typename Box, typename VectorMortonIdx, typename MortonDistribution> + inline VectorMortonIdx + get_ghosts_m2l_interaction(cpp_tools::parallel_manager::parallel_manager& para, Box const& box, + const std::size_t& level, int const& separation, + VectorMortonIdx const& local_morton_vect, MortonDistribution const& cell_distrib) { - VectorMortonIdxType ghost_to_add; + VectorMortonIdx ghost_to_add; auto const& period = box.get_periodicity(); const auto rank = para.get_process_id(); auto const my_distrib = cell_distrib[rank]; @@ -156,36 +134,64 @@ namespace scalfmm::tree for(auto morton_index: local_morton_vect) { // for each index in the vector of cells in local_morton_vect we compute the m2l interactions - auto coordinate{index::get_coordinate_from_morton_index<BoxType::dimension>(morton_index)}; + auto coordinate{index::get_coordinate_from_morton_index<Box::dimension>(morton_index)}; auto interaction_m2l_list = index::get_m2l_list(coordinate, level, period, separation); auto& list = std::get<0>(interaction_m2l_list); auto nb = std::get<2>(interaction_m2l_list); + // + // io::print("rank(" + std::to_string(rank) + ") list idx(m2l) : ", list); + // io::print("rank(" + std::to_string(rank) + ") my_distrib : ", my_distrib); int it{0}; // We check if the cells are in the distribution for(auto it = 0; it < nb; ++it) { + // if(list[it] > my_distrib[0]) + // std::cout << list[it] << " " << std::boolalpha + // << math::between(list[it], my_distrib[0], my_distrib[1]) << std::endl; + if(math::between(list[it], my_distrib[0], my_distrib[1])) { break; } bool check{false}; + // for(int i = 0; i < rank; ++i) for(int i = rank - 1; i >= 0; i--) { auto const& interval = cell_distrib[i]; + // // if(rank == 2) + // { + // std::cout << "parallel::utils::is_inside_distrib_left list[it]: " << interval[0] << " < " + // << list[it] + // << " < " << interval[1] << std::endl; + // } check = math::between(list[it], interval[0], interval[1]); if(check) { break; } } + // std::cout << " " << list[it] << " " << std::boolalpha << check << std::endl; if(check) // parallel::utils::is_inside_distrib_left(list[it], rank, cell_distrib)) { ghost_to_add.push_back(list[it]); } } - + // while(list[it] < my_distrib[0]) + // { + // std::cout << it << " INSIDE left idx " << list[it] << " " << std::boolalpha + // << parallel::utils::is_inside_distrib(list[it], cell_distrib) << std::endl; + // if(parallel::utils::is_inside_distrib_left(list[it], rank, cell_distrib)) + // { + // ghost_to_add.push_back(list[it]); + // } + // ++it; + // if(it > nb) + // { + // break; + // } + // } it = nb - 1; if(not last_proc) // No ghost on the right on last process { @@ -198,22 +204,20 @@ namespace scalfmm::tree --it; } } + // if(rank == 2) + // { + // io::print("rank(" + std::to_string(rank) + ") tmp ghost_to_add(m2l) : ", ghost_to_add); + // } } std::sort(ghost_to_add.begin(), ghost_to_add.end()); auto last = std::unique(ghost_to_add.begin(), ghost_to_add.end()); ghost_to_add.erase(last, ghost_to_add.end()); + // io::print("rank(" + std::to_string(rank) + ") cell_distrib: ", cell_distrib); + // io::print("rank(" + std::to_string(rank) + ") ghost_to_add(m2l): ", ghost_to_add); return ghost_to_add; } - /** - * @brief - * - * @tparam VectorLeafInfoType - * @param localLeaves - * @param ghosts - * @return auto - */ template<typename VectorLeafInfoType> auto merge_split_structure(VectorLeafInfoType const& localLeaves, VectorLeafInfoType const& ghosts) { @@ -261,7 +265,6 @@ namespace scalfmm::tree return std::make_tuple(morton, number_of_particles); } - /** * @brief Split the LeafInfo structure in two vectors (Morton, number_of_particles) * @@ -286,15 +289,6 @@ namespace scalfmm::tree } return std::make_tuple(morton, number_of_particles); } - - /** - * @brief - * - * @tparam VectorLeafInfoIteratorType - * @param begin - * @param end - * @return auto - */ template<typename VectorLeafInfoIteratorType> auto split_structure(const VectorLeafInfoIteratorType begin, const VectorLeafInfoIteratorType end) { @@ -323,8 +317,8 @@ namespace scalfmm::tree * @return the vector ot the three blocs */ template<typename VectorBlockType> - auto merge_blocs(VectorBlockType const& bloc1, VectorBlockType const& bloc2, - VectorBlockType const& bloc3) -> VectorBlockType + VectorBlockType merge_blocs(VectorBlockType const& bloc1, VectorBlockType const& bloc2, + VectorBlockType const& bloc3) { // Merge the three block structure auto size = bloc1.size() + bloc2.size() + bloc3.size(); @@ -345,22 +339,21 @@ namespace scalfmm::tree } return all_blocks; } - /** * @brief Construct the M2M ghost for the current level * * The routine check if there is ghosts during the M2M operation. * If yes, we exchange the ghost indexes - * @tparam BoxType - * @tparam VectorMortonIdxType - * @tparam MortonDistributionType + * @tparam Box + * @tparam VectorMortonIdx + * @tparam MortonDistribution * @param para the parallel manager * @param box the simulation box * @param level the current level * @param local_morton_vect * @param cells_distrib teh distribution of cells * @param top if top is true nothing is down - * @return VectorMortonIdxType + * @return VectorMortonIdx */ template<typename Box, typename VectorMortonIdx, typename MortonDistribution> [[nodiscard]] auto build_ghost_m2m_let_at_level(cpp_tools::parallel_manager::parallel_manager& para, Box& box, @@ -411,7 +404,8 @@ namespace scalfmm::tree send[idx] = local_morton_vect[0]; for(int i = 1; i < std::min(nb_children, int(local_morton_vect.size())); ++i) { - auto parent_index = local_morton_vect[i] >> BoxType::dimension; + auto parent_index = local_morton_vect[i] >> Box::dimension; + // std::cout << "index : " << local_morton_vect[i] << " Parent ! " << parent_first << " " << last_parent_previous_proc << std::endl; if(parent_index == last_parent_previous_proc) { ++idx; @@ -456,39 +450,39 @@ namespace scalfmm::tree // std::clog << " end build_ghost_m2m_let_at_level" << std::endl; return ghosts; } - - /** - * @brief construct the local essential tree (LET) at the level. - * - * We start from a given Morton index distribution and we compute all - * interactions needed - * in the algorithm steps. - * At the leaf level it corresponds to the interactions coming from the - * direct pass (P2P operators) - * and in the transfer pass (M2L operator). For the other levels we - * consider only the M2L interactions. - * The leaves_distrib and the cells_distrib might be different - * At the end the let has also all the interaction list computed - * - * @tparam BoxType - * @tparam VectorMortonIdxType - * @tparam MortonDistributionType - * @param para - * @param box - * @param[in] level the level to construct the let - * @param local_morton_vect - * @param[in] cells_distrib the morton index distribution for - * the cells at the leaf level. - * @param separation - * @return VectorMortonIdxType - */ - template<typename BoxType, typename VectorMortonIdxType, typename MortonDistributionType> - [[nodiscard]] auto build_let_at_level(cpp_tools::parallel_manager::parallel_manager& para, BoxType& box, - const int& level, const VectorMortonIdxType& local_morton_vect, - const MortonDistributionType& cells_distrib, - const int& separation) -> VectorMortonIdxType + /// + /// \brief construct the local essential tree (LET) at the level. + /// + /// We start from a given Morton index distribution and we compute all + /// interactions needed + /// in the algorithm steps. + /// At the leaf level it corresponds to the interactions coming from the + /// direct pass (P2P operators) + /// and in the transfer pass (M2L operator). For the other levels we + /// consider only the M2L interactions. + /// The leaves_distrib and the cells_distrib might be different + /// At the end the let has also all the interaction list computed + /// + /// \param[inout] tree the tree to compute the let. + /// \param[in] local_morton_idx the morton index of the particles in the + /// processors. + /// + /// + /// \param[in] cells_distrib the morton index distribution for + /// the cells at the leaf level. + /// + /// \param[in] level the level to construct the let + /// + template<typename Box, typename VectorMortonIdx, typename MortonDistribution> + [[nodiscard]] auto build_let_at_level(cpp_tools::parallel_manager::parallel_manager& para, Box& box, + const int& level, const VectorMortonIdx& local_morton_vect, + const MortonDistribution& cells_distrib, + const int& separation) -> VectorMortonIdx { const auto my_rank = para.get_process_id(); + // std::cout << cpp_tools::colors::red << " --> Begin let::build_let_at_level() at level = " << level + // << "dist: " << cells_distrib[my_rank] << cpp_tools::colors::reset << std::endl; + // io::print("rank(" + std::to_string(my_rank) + ") local_morton_vect : ", local_morton_vect); // we compute the cells needed in the M2L operator @@ -500,7 +494,13 @@ namespace scalfmm::tree std::cout << std::flush; /// Look if the morton index really exists in the distributed tree parallel::utils::check_if_morton_index_exist(para, needed_idx, cells_distrib, local_morton_vect); + /// + // io::print("rank(" + std::to_string(my_rank) + ") check_if_morton_index_exist(m2l) : ", needed_idx); + // + // std::cout << cpp_tools::colors::red + // << "rank(" + std::to_string(my_rank) + ")-- > End let::build_let_at_level() " + // << cpp_tools::colors::reset << std::endl; return needed_idx; } // template<typename OctreeTree, typename VectorMortonIdx, typename MortonDistribution> @@ -530,13 +530,12 @@ namespace scalfmm::tree // std::cout << cpp_tools::colors::green << " --> End let::build_let_at_level() at level = " << level // << cpp_tools::colors::reset << std::endl; // } - /** * @brief * - * @tparam BoxType - * @tparam VectorMortonIdxType - * @tparam MortonDistributionType + * @tparam Box + * @tparam VectorMortonIdx + * @tparam MortonDistribution * @param para * @param box * @param level @@ -544,11 +543,11 @@ namespace scalfmm::tree * @param leaves_distrib * @param separation */ - template<typename BoxType, typename VectorLeafInfo, typename MortonDistributionType> - [[nodiscard]] auto build_let_leaves(cpp_tools::parallel_manager::parallel_manager& para, BoxType const& box, + template<typename Box, typename VectorLeafInfo, typename MortonDistribution> + [[nodiscard]] auto build_let_leaves(cpp_tools::parallel_manager::parallel_manager& para, Box const& box, const std::size_t& level, const VectorLeafInfo& leaf_info /*local_morton_vect*/, - MortonDistributionType const& leaves_distrib, const int& separation) + MortonDistribution const& leaves_distrib, const int& separation) -> VectorLeafInfo { @@ -599,45 +598,46 @@ namespace scalfmm::tree return leaf_info_to_add; } - /** - * @brief buildLetTree Build the let of the tree and the leaves and cells distributions - * - * The algorithm has 5 steps: - * 1) We sort the particles according to their Morton Index (leaf level) - * 2) Build the leaf morton vector of my local particles and construct either - * the leaves distribution or the cell distribution according to parameter - * use_leaf_distribution or use_particle_distribution - * 3) Fit the particles inside the use_leaf_distribution - * 4) Construct the tree according to my particles and build the leaf - * morton vector of my local particles - * 5) Constructing the let level by level - * - * @tparam TreeType - * @tparam VectorType - * @tparam BoxType - * @param[in] manager the parallel manager - * @param[in] number_of_particles total number of particles in the simulation - * @param[in] particle_container vector of particles on my node. On output the array is sorted and correspond to teh distribution built - * @param[in] box size of the simulation box - * @param[in] leaf_level level of the leaf in the tree - * @param[in] level_shared the level at which cells are duplicated on processors. If the level is negative, - * nothing is duplicated. - * @param[in] groupSizeLeaves blocking parameter for the leaves (particles) - * @param[in] groupSizeCells blocking parameter for the cells - * @param[in] order order of the approximation to build the tree - * @param[in] use_leaf_distribution to say if you consider the leaf distribution - * @param[in] use_particle_distribution to say if you consider the particle distribution - * @return localGroupTree the LET of the octree processors - */ - template<typename TreeType, typename VectorType, typename BoxType> - auto buildLetTree(cpp_tools::parallel_manager::parallel_manager& manager, - const std::size_t& number_of_particles, VectorType& particle_container, const BoxType& box, - const int& leaf_level, const int& level_shared, const int groupSizeLeaves, - const int groupSizeCells, const int order, const int separation, - const bool use_leaf_distribution, const bool use_particle_distribution) -> TreeType + /// + /// \brief buildLetTree Build the let of the tree and the leaves and cells distributions + /// + /// The algorithm has 5 steps: + /// 1) We sort the particles according to their Morton Index (leaf level) + /// 2) Build the leaf morton vector of my local particles and construct either + /// the leaves distribution or the cell distribution according to parameter + /// use_leaf_distribution or use_particle_distribution + /// 3) Fit the particles inside the use_leaf_distribution + /// 4) Construct the tree according to my particles and build the leaf + /// morton vector of my local particles + /// 5) Constructing the let level by level + /// + /// \param[in] manager the parallel manager + /// \param[in] number_of_particles total number of particles in the simulation + /// \param[in] particle_container vector of particles on my node. On output the + /// array is sorted and correspond to teh distribution built + /// \param[in] box size of the simulation box + /// \param[in] leaf_level level of the leaf in the tree + /// \param[in] level_shared the level at which cells are duplicated on processors. If the level is negative, + /// nothing is duplicated. + /// \param[in] groupSizeLeaves blocking parameter for the leaves (particles) + /// \param[in] groupSizeCells blocking parameter for the cells + /// @param[in] order order of the approximation to build the tree + /// @param[in] use_leaf_distribution to say if you consider the leaf distribution + /// @param[in] use_particle_distribution to say if you consider the particle distribution + /// @return localGroupTree the LET of the octree + + /// processors + template<typename Tree_type, typename Vector_type, typename Box_type> + Tree_type + buildLetTree(cpp_tools::parallel_manager::parallel_manager& manager, const std::size_t& number_of_particles, + Vector_type& particle_container, const Box_type& box, const int& leaf_level, + const int& level_shared, const int groupSizeLeaves, const int groupSizeCells, const int order, + const int separation, const bool use_leaf_distribution, const bool use_particle_distribution) { + // std::cout << cpp_tools::colors::green << " --> Begin let::group_let() " << cpp_tools::colors::reset + // << std::endl; // - static constexpr std::size_t dimension = VectorType::value_type::dimension; + static constexpr std::size_t dimension = Vector_type::value_type::dimension; const auto rank = manager.get_process_id(); //////////////////////////////////////////////////////////////////////////// /// Sort the particles at the leaf level according to their Morton index @@ -684,7 +684,7 @@ namespace scalfmm::tree /// /// A morton index should be own by only one process /// - using morton_distrib_type = typename TreeType::data_distrib_type; + using morton_distrib_type = typename Tree_type::data_distrib_type; /// /// Build a uniform distribution of the leaves/cells @@ -752,8 +752,8 @@ namespace scalfmm::tree /// /// Construct the local tree based on our set of particles // Build and empty tree - TreeType localGroupTree(manager, static_cast<std::size_t>(leaf_level + 1), level_shared, order, - groupSizeLeaves, groupSizeCells, box); + Tree_type localGroupTree(manager, static_cast<std::size_t>(leaf_level + 1), level_shared, order, + groupSizeLeaves, groupSizeCells, box); /// Set true because the particles are already sorted /// In fact we have all the leaves to add in leafMortonIdx - could be used to construct /// the tree !!! @@ -761,6 +761,9 @@ namespace scalfmm::tree #ifdef SCALFMM_USE_MPI + // std::cout << cpp_tools::colors::red; + // io::print("rank(" + std::to_string(rank) + ") leafMortonIdx: ", leafMortonIdx); + // std::cout << cpp_tools::colors::reset << std::endl; /// End //////////////////////////////////////////////////////////////////////////////////////////// /// @@ -772,6 +775,7 @@ namespace scalfmm::tree { leafMortonIdx[i] = scalfmm::index::get_morton_index(particle_container[i].position(), box, leaf_level); } + // io::print("rank(" + std::to_string(rank) + ") --> leafMortonIdx: ", leafMortonIdx); // localLeafInfo contains information on leaves (morton, number of particles) own by th current process std::vector<tree::leaf_info_type<morton_type>> localLeafInfo(leafMortonIdx.size()); @@ -796,6 +800,8 @@ namespace scalfmm::tree } leafMortonIdx.resize(idx + 1); localLeafInfo.resize(leafMortonIdx.size()); + // io::print("rank(" + std::to_string(rank) + ") --> localLeafInfo: ", localLeafInfo); + // io::print("rank(" + std::to_string(rank) + ") --> leafMortonIdx: ", leafMortonIdx); //////////////////////////////////////////////////////////////////////////////////////// // Build the pointer of the tree with all parameters @@ -809,9 +815,17 @@ namespace scalfmm::tree auto ghostP2P_leafInfo = build_let_leaves(manager, box, leaf_level, localLeafInfo, particles_distrib, separation); + // io::print("rank(" + std::to_string(rank) + ") --> final ghostP2P_leafInfo: ", + // ghostP2P_leafInfo); io::print("rank(" + std::to_string(rank) + ") --> final localLeafInfo: ", + // localLeafInfo); localGroupTree.set_leaf_distribution(particles_distrib); + // std::cout << std::flush; + // std::cout << cpp_tools::colors::red; + // std::cout << "END LEAF LEVEL " << std::endl; + // std::cout << cpp_tools::colors::reset; + /// If the distribution is not the same for the leaf and the cell we redistribute the /// morton index according to the uniform distribution of morton index /// @@ -861,20 +875,33 @@ namespace scalfmm::tree // build all leaves between leaf_level - 1 and level_shared -1. // we use the maximum because if we don't share certain levels this number is <0 + // std::cout << "std::max(level_shared, int(localGroupTree.top_level())) " + // << std::max(level_shared, int(localGroupTree.top_level())) << std::endl; + // std::cout << " XXXXXXXXXX -> std::max(level_shared, int(localGroupTree.top_level() - 1))" + // << std::max(level_shared, int(localGroupTree.top_level() - 1)) << std::endl; + ; for(int level = leaf_level - 1; level >= localGroupTree.top_level(); --level) { + // std::cout << " $$$$$$$$$$$$$$$$$$$$$$$$$ level " << level << " $$$$$$$$$$$$$$$$$$$$$$$$$ " + // << std::endl; std::int64_t ghost_l2l_cell{-1}; // Get the distribution at the current level, the ghost cell involved in l2l operator // and the morton index of the existing cells at this level level_dist[level] = std::move(parallel::utils::build_upper_distribution( manager, dimension, level, leafMortonIdx, ghost_l2l_cell, level_dist[level + 1])); + // io::print("rank(" + std::to_string(rank) + ") MortonIdx(" + std::to_string(level) + "): ", + // leafMortonIdx); + // std::cout << " ghost_l2l_cell: " << ghost_l2l_cell << std::endl; // Set the distribution in tres tree localGroupTree.set_cell_distribution(level, level_dist[level]); // build the m2l ghost cells at this level auto ghost_cells_level = build_let_at_level(manager, box, level, leafMortonIdx, level_dist[level], separation); + // io::print("rank(" + std::to_string(rank) + ") level=" + std::to_string(level) + + // " --> final ghost_cells(m2l): ", + // ghost_cells_level); // build the m2m ghost cells at this level auto ghost_m2m_cells = build_ghost_m2m_let_at_level( @@ -884,8 +911,12 @@ namespace scalfmm::tree // Create the groupe of cells structure for this level localGroupTree.create_cells_at_level(level, leafMortonIdx, ghost_cells_level, ghost_m2m_cells, ghost_l2l_cell, level_dist[level][rank]); + // std::cout << " $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ " << std::endl + // << std::flush; } + // std::cout << " end loop\n" << std::flush; manager.get_communicator().barrier(); + // std::cout << " end barrier\n" << std::flush; } else #endif // SCALFMM_USE_MPI @@ -901,6 +932,7 @@ namespace scalfmm::tree localGroupTree.construct(particleMortonIndex); // then, we fill each leaf with its particles (the particle container is sorted ) localGroupTree.fill_leaves_with_particles(particle_container); + // localGroupTree.set_leaf_distribution(particles_distrib); localGroupTree.set_cell_distribution(leaf_level, leaves_distrib); @@ -913,8 +945,17 @@ namespace scalfmm::tree } } + // std::cout << cpp_tools::colors::red << std::endl << std::flush; + // std::cout << "set iterators \n" << std::flush << std::flush; localGroupTree.set_valid_iterators(true); + // std::cout << "begin fill_leaves_with_particles \n" << std::flush; localGroupTree.fill_leaves_with_particles(particle_container); + // std::cout << "end fill_leaves_with_particles \n" << std::flush; + + // std::cout << cpp_tools::colors::reset << std::endl; + // std::cout << cpp_tools::colors::green << " --> End let::group_let() " << cpp_tools::colors::reset + // << std::endl + // << std::flush; return localGroupTree; }