Commit bcbc2895 authored by ESTERIE Pierre's avatar ESTERIE Pierre

Final simd p2m

parent 3c0ef569
......@@ -4,16 +4,16 @@
#include <chrono>
#include <functional>
#include <iostream>
#include <utility>
#include <random>
#include <tuple>
#include <utility>
#include <scalfmm/container/particle.hpp>
#include <scalfmm/container/particle_container.hpp>
#include <scalfmm/core/operators.hpp>
#include <scalfmm/interpolation/uniform.hpp>
#include <scalfmm/tools/colorized.hpp>
#include <scalfmm/tree/leaf.hpp>
#include <scalfmm/core/operators.hpp>
int main()
{
......@@ -21,11 +21,11 @@ int main()
namespace colors = scalfmm::colors;
constexpr size_type particle_dim{3};
//constexpr size_type potential_dim{3};
// constexpr size_type potential_dim{3};
constexpr size_type order{6};
using particle_type = scalfmm::container::particle<double, particle_dim, double, double, double>;
//using tuple_type = std::tuple<double,double,double>;
// using tuple_type = std::tuple<double,double,double>;
using point_type = scalfmm::container::point<double, particle_dim>;
using container_type = scalfmm::container::particle_container<particle_type>;
using multipole_type = scalfmm::container::variadic_container<double, double, double>;
......@@ -45,7 +45,7 @@ int main()
auto random_r = [&dis, &gen]() { return dis(gen); };
point_type center = {2.*width, 0., 0.};
point_type center = {2. * width, 0., 0.};
auto make_particle = [&center, &width, &random_r]() {
point_type position = {random_r() * width, random_r() * width, random_r() * width};
......@@ -67,12 +67,11 @@ int main()
// Construct leaf
leaf_type l(std::move(particles_source), center, width);
multipole_type multipoles(scalfmm::meta::pow(order,particle_dim), std::make_tuple(0.,0.,0.));
multipole_type multipoles(scalfmm::meta::pow(order, particle_dim), std::make_tuple(0., 0., 0.));
std::vector<std::chrono::microseconds> t;
for(std::size_t exp = 0; exp < 50; ++exp)
for(std::size_t exp = 0; exp < 200; ++exp)
{
//multipoles.clear();
start = std::chrono::high_resolution_clock::now();
scalfmm::operators::apply_p2m(s, order, l, multipoles);
end = std::chrono::high_resolution_clock::now();
......@@ -81,34 +80,31 @@ int main()
if(exp == 0)
{
std::ofstream file("dump.txt");
auto it = std::begin(multipoles);
for(int i = 0; i < multipoles.size(); ++i)
{
auto it = std::begin(multipoles);
file << std::fixed << std::setprecision(13) << std::get<0>(*it) << ' ';
file << std::fixed << std::setprecision(13) << std::get<0>(*it) << '\n';
it++;
}
it = std::begin(multipoles);
for(int i = 0; i < multipoles.size(); ++i)
{
auto it = std::begin(multipoles);
file << std::fixed << std::setprecision(13) << std::get<1>(*it) << ' ';
file << std::fixed << std::setprecision(13) << std::get<1>(*it) << '\n';
it++;
}
it = std::begin(multipoles);
for(int i = 0; i < multipoles.size(); ++i)
{
auto it = std::begin(multipoles);
file << std::fixed << std::setprecision(13) << std::get<2>(*it) << ' ';
file << std::fixed << std::setprecision(13) << std::get<2>(*it) << '\n';
it++;
}
file.close();
}
}
std::sort(t.begin(),t.end());
std::sort(t.begin(), t.end());
std::cout << colors::yellow << "p2m took... : "
<< t[t.size()/2].count() << "us\n"
<< colors::reset;
std::cout << colors::yellow << "p2m took... : " << t[t.size() / 2].count() << "us\n" << colors::reset;
return 0;
}
......@@ -5,11 +5,11 @@
#ifndef SCALFMM_CORE_OPERATORS_HPP
#define SCALFMM_CORE_OPERATORS_HPP
#include <tuple>
#include <algorithm>
#include <fstream>
#include <utility>
#include <string>
#include <tuple>
#include <utility>
#include <xsimd/xsimd.hpp>
#include <xtensor/xarray.hpp>
......@@ -17,6 +17,7 @@
#include <scalfmm/container/point.hpp>
#include <scalfmm/interpolation/mapping.hpp>
#include <scalfmm/meta/utils.hpp>
#include <scalfmm/tools/colorized.hpp>
namespace scalfmm::simd
{
......@@ -46,12 +47,12 @@ namespace scalfmm::simd
template<typename Particle, typename TuplePtr, std::size_t... Is>
constexpr inline auto load_attributes(TuplePtr const& particle_ptrs, std::index_sequence<Is...> s)
{
return std::make_tuple(xsimd::load_aligned(std::get<Particle::dimension+Is>(particle_ptrs))...);
return std::make_tuple(xsimd::load_aligned(std::get<Particle::dimension + Is>(particle_ptrs))...);
}
template<typename TupleAttributes, typename TuplePtr, std::size_t... Is>
constexpr inline void store_attributes(TuplePtr const& attributes_ptrs, TupleAttributes const& src,
std::index_sequence<Is...> s)
std::index_sequence<Is...> s)
{
noop_f{(xsimd::store_aligned(std::get<Is>(attributes_ptrs), std::get<Is>(src)), 0)...};
}
......@@ -63,12 +64,6 @@ namespace scalfmm::simd
return return_type{{f(std::get<Is>(std::forward<Container>(input)), std::forward<Types>(ts)...)...}};
}
template<typename Container, std::size_t... ISs>
constexpr inline auto generate_s(std::index_sequence<ISs...> s, Container const& cont,
std::array<std::size_t, sizeof...(ISs)> const& index)
{
return meta::multiply(std::get<ISs>(cont[index[ISs]])...);
}
} // namespace impl
template<typename Position, typename TuplePtr>
......@@ -92,7 +87,8 @@ namespace scalfmm::simd
template<typename TupleAttributes, typename TuplePtr>
constexpr inline void store_attributes(TuplePtr& attributes_ptrs, TupleAttributes const& src)
{
impl::store_attributes(attributes_ptrs, src, std::make_index_sequence<std::tuple_size<TupleAttributes>::value>{});
impl::store_attributes(attributes_ptrs, src,
std::make_index_sequence<std::tuple_size<TupleAttributes>::value>{});
}
template<std::size_t Size, typename F, typename Container, typename... Types>
......@@ -102,31 +98,40 @@ namespace scalfmm::simd
std::forward<Types>(ts)...);
}
template<std::size_t N, typename Container>
constexpr inline auto generate_s(Container const& cont, std::array<std::size_t, N> const& index)
{
return impl::generate_s(std::make_index_sequence<N>{}, cont, index);
}
} // namespace scalfmm::simd
namespace scalfmm::utils
{
namespace impl
{
template<typename Container, std::size_t... ISs>
constexpr inline auto generate_s(std::index_sequence<ISs...> s, Container const& cont,
std::array<std::size_t, sizeof...(ISs)> const& index)
{
return meta::multiply(std::get<ISs>(cont[index[ISs]])...);
}
template<typename T, typename U, std::size_t... Is>
inline constexpr auto tuple_update(T&& lhs, U&& rhs, std::index_sequence<Is...> s)
{
simd::impl::noop_f{((std::get<Is>(std::forward<T>(lhs))+=std::get<Is>(std::forward<U>(rhs))),0)...};
simd::impl::noop_f{((std::get<Is>(std::forward<T>(lhs)) += std::get<Is>(std::forward<U>(rhs))), 0)...};
}
} // namespace impl
template<std::size_t N, typename Container>
constexpr inline auto generate_s(Container const& cont, std::array<std::size_t, N> const& index)
{
return impl::generate_s(std::make_index_sequence<N>{}, cont, index);
}
template<typename T, typename U>
inline constexpr void tuple_update(T&& lhs, U&& rhs)
{
return impl::tuple_update(std::forward<T>(lhs), std::forward<U>(rhs), std::make_index_sequence<std::tuple_size<T>::value>{});
return impl::tuple_update(std::forward<T>(lhs), std::forward<U>(rhs),
std::make_index_sequence<std::tuple_size<T>::value>{});
}
}
} // namespace scalfmm::utils
namespace scalfmm::operators
{
......@@ -155,36 +160,28 @@ namespace scalfmm::operators
auto particle_iterator = leaf.begin();
constexpr std::size_t potential_size{particle_type::attributes_size};
std::size_t nnodes = meta::pow(order, position_type::dimension);
std::vector<simd_position_type> poly_of_part(order);
std::vector<value_type, XSIMD_DEFAULT_ALLOCATOR(value_type)> S(meta::pow(order, position_type::dimension) * inc);
std::vector<value_type, XSIMD_DEFAULT_ALLOCATOR(value_type)> S(nnodes * inc);
for(std::size_t part = 0; part < vec_size; part += inc)
{
//std::cout << "part_index = " << part << '\n';
simd_position_type part_position(simd::load_position<simd_position_type>(particle_iterator));
simd_position_type local_position{};
mapping_part_position(part_position, local_position);
//if(part == 0) std::cout << local_position << '\n';
std::size_t o = 0;
for(auto& p : poly_of_part)
for(std::size_t o = 0; o < order; ++o)
{
polynomials_f_type polynomials{};
p = simd::apply_f<simd_position_type::dimension>(polynomials, local_position, order, o);
//if(part == 0) std::cout << poly_of_part[o] << '\n';
o++;
poly_of_part[o] = simd::apply_f<simd_position_type::dimension>(polynomials, local_position, order, o);
}
// Assembling S
std::size_t idx = 0;
auto construct_s = [&S, &poly_of_part, &idx, inc](auto&... current_indices) {
//meta::print(std::cout, current_indices...);
//std::cout << '\n';
auto s_simd = simd::generate_s<position_type::dimension>(poly_of_part, {{current_indices...}});
//if(part == 0) std::cout << s_simd[0] << '\n';
auto construct_s = [&part, &S, &poly_of_part, &idx, inc](auto&... current_indices) {
auto s_simd = utils::generate_s<position_type::dimension>(poly_of_part, {{current_indices...}});
xsimd::store_aligned(&S[idx], s_simd);
idx += inc;
};
......@@ -194,35 +191,25 @@ namespace scalfmm::operators
meta::looper<position_type::dimension>()(construct_s, stops);
auto update_potential = [](auto pot, const value_type* current_s)
{
auto s_to_apply = xsimd::load_aligned(current_s);
return s_to_apply*pot;
};
std::size_t vec_idx = 0;
auto simd_potentials = simd::load_attributes<particle_type>(particle_iterator);
auto multipoles_iterator = expansion.begin();
for(std::size_t m = 0; m < expansion.size(); ++m)
{
auto s_to_apply = xsimd::load_aligned(&S[vec_idx]);
auto simd_potentials = simd::load_attributes<particle_type>(particle_iterator);
auto multipoles_iterator = expansion.begin();
auto update_potential = [&s_to_apply](auto... pot)
{
return std::make_tuple(s_to_apply*pot...);
};
auto update_potential = [&s_to_apply](auto... pot) { return std::make_tuple(s_to_apply * pot...); };
simd_potentials = std::apply(update_potential, simd_potentials);
auto to_update_multipoles = std::apply(update_potential, simd_potentials);
auto reduce_potentials = [](auto... pot)
{
return std::make_tuple(xsimd::hadd(pot)...);
auto reduce_update_multipoles = [](auto... simd_to_reduce) {
return std::make_tuple(xsimd::hadd(simd_to_reduce)...);
};
auto weight_to_apply = std::apply(reduce_potentials, simd_potentials);
auto weight_to_apply = std::apply(reduce_update_multipoles, to_update_multipoles);
utils::tuple_update(*multipoles_iterator,weight_to_apply);
utils::tuple_update(*multipoles_iterator, weight_to_apply);
multipoles_iterator++;
vec_idx += inc;
......@@ -230,7 +217,7 @@ namespace scalfmm::operators
poly_of_part.clear();
S.clear();
particle_iterator++;
particle_iterator += inc;
}
}
......
......@@ -15,38 +15,36 @@
#include <scalfmm/utils/math.hpp>
#include <xsimd/xsimd.hpp>
#include <xtensor/xtensor.hpp>
#include <xtensor/xarray.hpp>
#include <xtensor/xtensor.hpp>
namespace scalfmm::interpolation
{
namespace impl
{
template<typename T, std::size_t N>
inline void compare(xsimd::batch<T,N>& x)
inline void compare(xsimd::batch<T, N>& x)
{
using batch_type = xsimd::batch<T,N>;
using batch_type = xsimd::batch<T, N>;
auto booleans = xsimd::abs(x) > batch_type(1.);
if(xsimd::any(booleans))
{
x = xsimd::select(x>batch_type(1.0), batch_type(1.), x);
x = xsimd::select(x<batch_type(-1.0), batch_type(-1.), x);
x = xsimd::select(x > batch_type(1.0), batch_type(1.), x);
x = xsimd::select(x < batch_type(-1.0), batch_type(-1.), x);
}
}
template<typename T>
inline auto compare(T& x)
-> std::enable_if_t<std::is_scalar<T>::value>
inline auto compare(T& x) -> std::enable_if_t<std::is_scalar<T>::value>
{
if (std::abs(x)>1.)
if(std::abs(x) > 1.)
{
x = (x > T( 1.) ? T( 1.) : x);
x = (x > T(1.) ? T(1.) : x);
x = (x < T(-1.) ? T(-1.) : x);
}
}
}
} // namespace impl
template<typename T>
struct uniform_roots : roots_callable<T>
......@@ -62,7 +60,7 @@ namespace scalfmm::interpolation
inline value_type operator()(value_type x, const std::size_t order, const std::size_t n) override
{
//assert(xsimd::any(xsimd::abs(x) - 1. < 10. * std::numeric_limits<value_type>::epsilon()));
// assert(xsimd::any(xsimd::abs(x) - 1. < 10. * std::numeric_limits<value_type>::epsilon()));
impl::compare(x);
......@@ -96,7 +94,7 @@ namespace scalfmm::interpolation
// new version (reducing round-off)
// regular grid on [-1,1] (h simplifies, only the size of the domain and a remains i.e. 2. and -1.)
L *= (value_type(int(order) - 1) * (x + value_type(1.)) - (value_type(2.) * value_type(m)));
L *= ((value_type(int(order) - 1) * (x + value_type(1.))) - (value_type(2.) * value_type(m)));
}
}
......@@ -120,7 +118,6 @@ namespace scalfmm::interpolation
template<typename T>
using polynomials_f_type = uniform_polynomials<T>;
explicit uniform_interpolator(size_type order, size_type tree_height = 3,
value_type root_cell_width = value_type(1.),
value_type cell_width_extension = value_type(0.))
......
......@@ -19,6 +19,8 @@ namespace scalfmm::meta
template<typename... Args>
constexpr inline auto multiply(Args&... args)
{
//print(std::cout, args...);
//std::cout << '\n';
return (args * ...);
}
......@@ -29,6 +31,7 @@ namespace scalfmm::meta
template<typename F, std::size_t Dimension, typename... Is>
constexpr inline void operator()(F&& f, std::array<std::size_t,Dimension> const& stops, Is& ... is)
{
//print(std::cout, is...); std::cout << '\n';
for(std::size_t i=0; i<std::get<N-1>(stops); ++i)
{
looper<N-1>()(std::forward<F>(f), stops, i, is...);
......@@ -42,9 +45,11 @@ namespace scalfmm::meta
template<typename F, std::size_t Dimension, typename... Is>
constexpr inline void operator()(F&& f, std::array<std::size_t,Dimension> const& stops, Is& ... is)
{
//print(std::cout, is...); std::cout << '\n';
for(std::size_t i=0; i<std::get<0>(stops); ++i)
{
f(i,is...);
f(i, is...);
}
}
};
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment