Commit c8fd3a82 authored by ESTERIE Pierre's avatar ESTERIE Pierre
Browse files

some cleaning in operator + fftw wrapper + complex product

parent 6e85e889
......@@ -10,6 +10,7 @@
#include <iterator>
#include <tuple>
#include <utility>
namespace scalfmm::algorithms::pass
{
......@@ -59,7 +60,7 @@ namespace scalfmm::algorithms::pass
assertm(leaf.csymbolics().morton_index == cell.symbolics().morton_index,
"Bottom pass : morton indexes of leaf and cell does not match.");
p2m(interpolator, leaf, cell, cell.order());
p2m(std::forward<Interpolator>(interpolator), leaf, cell, cell.order());
++leaf_begin;
++cell_begin;
......
......@@ -161,7 +161,7 @@ namespace scalfmm::interpolation
auto apply_m2l(Cell const& source_cell, Cell& target_cell, std::size_t neighbor_idx, std::size_t order,
std::size_t tree_level, [[maybe_unused]] buffer_type& products) const -> void
{
if(homogeneity_tag == matrix_kernels::homogeneity::homogenous)
if constexpr (homogeneity_tag == matrix_kernels::homogeneity::homogenous)
{
return this->derived_cast().template apply_m2l_impl<Cell>(
source_cell, target_cell, m_fc.at(neighbor_idx), m_far_field.scale_factor(target_cell.width()), order,
......
......@@ -16,7 +16,6 @@
#include "scalfmm/simd/memory.hpp"
#include "scalfmm/simd/utils.hpp"
#include "scalfmm/tools/colorized.hpp"
#include "scalfmm/utils/fftw.hpp"
#include "scalfmm/utils/math.hpp"
#include "scalfmm/utils/tensor.hpp"
......@@ -51,6 +50,7 @@
#include <xtl/xclosure.hpp>
#include <xtl/xcomplex.hpp>
namespace scalfmm::interpolation
{
template<typename ValueType, std::size_t Dimension, typename FarFieldMatrixKernel>
......@@ -183,6 +183,7 @@ namespace scalfmm::interpolation
{
std::vector<std::size_t> shape(dimension, 2 * this->order() - 1);
shape.at(dimension - 1) = this->order();
return buffer_type(buffer_shape_type{}, buffer_inner_type(shape, 0.));
}
......@@ -193,17 +194,13 @@ namespace scalfmm::interpolation
using fft_type = container::get_variadic_adaptor_t<xt::xarray<typename multipoles_container::value_type>,
Cell::inputs_size>;
auto const& multipoles = current_cell.cmultipoles();
auto& mtilde = current_cell.transformed_multipoles();
std::vector shape_nnodes(Cell::dimension, order);
std::vector shape_transformed(Cell::dimension, std::size_t(2 * order - 1));
// Resize buffers to store fft results
for(std::size_t m = 0; m < km; ++m)
{
mtilde.at(m).resize(shape_nnodes);
}
std::vector shape_transformed(dimension, std::size_t(2 * order - 1));
std::vector<std::size_t> shape_out(shape_transformed);
shape_out.at(dimension - 1) = this->order();
fft_type padded_tensors;
meta::for_each(padded_tensors, [&shape_transformed](auto& t) { t.resize(shape_transformed); });
......@@ -216,6 +213,15 @@ namespace scalfmm::interpolation
views_in_padded = meta::apply(multipoles, [](auto const& r) { return r; });
// Resize buffers to store fft results
for(std::size_t m = 0; m < km; ++m)
{
mtilde.at(m).resize(shape_out);
}
//meta::repeat_indexed(
// [&mtilde, this](std::size_t m, auto const& r) { fft.execute_plan(r, mtilde.at(m)); },
// padded_tensors);
meta::repeat_indexed(
[&mtilde](std::size_t m, auto const& r) { mtilde.at(m) = xt::fftw::rfftn<dimension>(r); },
padded_tensors);
......@@ -232,7 +238,7 @@ namespace scalfmm::interpolation
{
meta::repeat_indexed(
[&k, &products, &transformed_multipoles, m](std::size_t n, auto const& s_f) {
products.at(n) += transformed_multipoles.at(m) * k.at(n, m) * s_f;
tensor::product(products.at(n), transformed_multipoles.at(m), k.at(n,m), s_f);
},
scale_factor);
}
......@@ -246,7 +252,7 @@ namespace scalfmm::interpolation
auto& target_expansion = current_cell.locals();
meta::repeat_indexed(
[&products, order](std::size_t n, auto& expansion) {
[&products, order, this](std::size_t n, auto& expansion) {
auto view_for_update = xt::eval(tensor::get_view<dimension>(
xt::fftw::irfftn<dimension>(products.at(n), true), xt::range(0, order)));
expansion += view_for_update;
......
......@@ -83,6 +83,9 @@ namespace scalfmm::operators
auto call_polynomials = [&interp](auto x, std::size_t o, std::size_t n) { return interp.polynomials(x, o, n); };
std::array<std::size_t, position_type::dimension> stops{};
stops.fill(order);
// here, we process simd_size (i.e. inc) particles at each loop turn
for(std::size_t part = 0; part < vec_size; part += inc)
{
......@@ -110,9 +113,6 @@ namespace scalfmm::operators
idx += inc;
};
std::array<std::size_t, position_type::dimension> stops{};
stops.fill(order);
// expand N loop over ORDER
meta::looper<position_type::dimension>()(construct_s, stops);
......@@ -179,9 +179,6 @@ namespace scalfmm::operators
idx++;
};
std::array<std::size_t, position_type::dimension> stops{};
stops.fill(order);
meta::looper<position_type::dimension>()(construct_s, stops);
auto locals_iterator = local_expansion.cbegin();
......
......@@ -44,7 +44,6 @@ namespace scalfmm::operators
const std::size_t inc = simd_type::size;
// number of particles in the leaf
const std::size_t leaf_size{source_leaf.size()};
// vectorizable particles
const std::size_t vec_size = leaf_size - leaf_size % inc;
......@@ -63,6 +62,9 @@ namespace scalfmm::operators
// Resulting S, also in simd
std::vector<value_type, XSIMD_DEFAULT_ALLOCATOR(value_type)> S(nnodes * inc);
std::array<std::size_t, position_type::dimension> stops{};
stops.fill(order);
auto call_polynomials = [&interp](auto x, std::size_t o, std::size_t n) { return interp.polynomials(x, o, n); };
// here, we process simd_size (i.e. inc) particles at each loop turn
for(std::size_t part = 0; part < vec_size; part += inc)
......@@ -91,8 +93,6 @@ namespace scalfmm::operators
idx += inc;
};
std::array<std::size_t, position_type::dimension> stops{};
stops.fill(order);
// expand N loop over ORDER
meta::looper<position_type::dimension>()(construct_s, stops);
......@@ -120,8 +120,6 @@ namespace scalfmm::operators
vec_idx += inc;
}
poly_of_part.clear();
S.clear();
position_iterator += inc;
inputs_iterator += inc;
}
......@@ -151,7 +149,6 @@ namespace scalfmm::operators
for(std::size_t o = 0; o < order; ++o)
{
// polynomials_f_type_scal polynomials{};
poly_of_part_scal[o] =
simd::apply_f<position_type::dimension>(call_polynomials, local_position, order, o);
}
......@@ -164,12 +161,8 @@ namespace scalfmm::operators
idx++;
};
std::array<std::size_t, position_type::dimension> stops{};
stops.fill(order);
meta::looper<position_type::dimension>()(construct_s, stops);
auto physical_value = simd::load_tuple<value_type>(inputs_iterator);
auto multipoles_iterator = multipoles_expansion.begin();
for(std::size_t m = 0; m < nnodes; ++m)
......@@ -177,14 +170,12 @@ namespace scalfmm::operators
auto s_to_apply = S_scal[m];
auto update_potential = [&s_to_apply](auto... pot) { return std::make_tuple(s_to_apply * pot...); };
auto weight_to_apply = std::apply(update_potential, physical_value);
auto weight_to_apply = std::apply(update_potential, *inputs_iterator);
meta::tuple_sum_update(*multipoles_iterator, weight_to_apply);
++multipoles_iterator;
}
poly_of_part_scal.clear();
S_scal.clear();
++position_iterator;
++inputs_iterator;
}
......
......@@ -16,97 +16,134 @@
namespace scalfmm::fftw
{
inline std::mutex& fftw_global_mutex()
{
static std::mutex m;
return m;
}
template<typename ValueType>
struct fft;
template<std::size_t dim>
inline auto create_inverse_plan(xt::xarray<xtl::xcomplex<double>> const& input, xt::xarray<double>& output,
unsigned int flags, bool odd_last_dim = false)
template<>
struct fft<double>
{
using fftw_input_t = fftw_complex;
using fftw_output_t = double;
inline std::mutex& fftw_global_mutex()
{
static std::mutex m;
return m;
}
auto dft_dimensions_unsigned = xt::fftw::dft_dimensions_from_output(output, false, odd_last_dim);
std::vector<int> dft_dimensions;
dft_dimensions.reserve(dft_dimensions_unsigned.size());
std::transform(dft_dimensions_unsigned.begin(), dft_dimensions_unsigned.end(),
std::back_inserter(dft_dimensions), [&](std::size_t d) { return static_cast<int>(d); });
template<std::size_t dim>
inline auto create_inverse_plan(xt::xarray<std::complex<double>> const& input, xt::xarray<double>& output,
unsigned int flags, bool odd_last_dim = false) -> double
{
using fftw_input_t = fftw_complex;
using fftw_output_t = double;
auto N_dft = static_cast<fftw_output_t>(std::accumulate(dft_dimensions.begin(), dft_dimensions.end(),
static_cast<size_t>(1u), std::multiplies<size_t>()));
// std::cout << "dft unsigned : ";
// for(auto e : dft_dimensions_unsigned){ std::cout << e << ' '; }
// std::cout << '\n';
// std::cout << "dim : " << dim << '\n';
// std::cout << "dft : ";
// for(auto e : dft_dimensions){ std::cout << e << ' '; }
// std::cout << '\n';
auto dft_dimensions_unsigned = xt::fftw::dft_dimensions_from_output(output, false, odd_last_dim);
std::vector<int> dft_dimensions;
dft_dimensions.reserve(dft_dimensions_unsigned.size());
std::transform(dft_dimensions_unsigned.begin(), dft_dimensions_unsigned.end(),
std::back_inserter(dft_dimensions), [&](std::size_t d) { return static_cast<int>(d); });
std::lock_guard<std::mutex> guard(fftw_global_mutex());
auto plan = fftw_plan_dft_c2r(static_cast<int>(dim), dft_dimensions.data(),
const_cast<fftw_input_t*>(reinterpret_cast<const fftw_input_t*>(input.data())),
reinterpret_cast<fftw_output_t*>(output.data()), flags);
auto N_dft = static_cast<fftw_output_t>(std::accumulate(
dft_dimensions.begin(), dft_dimensions.end(), static_cast<size_t>(1u), std::multiplies<size_t>()));
// std::cout << "dft unsigned : ";
// for(auto e : dft_dimensions_unsigned){ std::cout << e << ' '; }
// std::cout << '\n';
// std::cout << "dim : " << dim << '\n';
// std::cout << "dft : ";
// for(auto e : dft_dimensions){ std::cout << e << ' '; }
// std::cout << '\n';
if(plan == nullptr)
{
XTENSOR_FFTW_THROW(
std::runtime_error,
"Plan creation returned nullptr. This usually means FFTW cannot create a plan for the given arguments "
"(e.g. a non-destructive multi-dimensional real FFT is impossible in FFTW).");
std::lock_guard<std::mutex> guard(fftw_global_mutex());
plan_inv =
fftw_plan_dft_c2r(static_cast<int>(dim), dft_dimensions.data(),
const_cast<fftw_input_t*>(reinterpret_cast<const fftw_input_t*>(input.data())),
reinterpret_cast<fftw_output_t*>(output.data()), flags);
if(plan == nullptr)
{
XTENSOR_FFTW_THROW(std::runtime_error,
"Plan creation returned nullptr. This usually means FFTW cannot create a plan for "
"the given arguments "
"(e.g. a non-destructive multi-dimensional real FFT is impossible in FFTW).");
}
plan_inv_exists = true;
return N_dft;
}
return std::make_tuple(plan, N_dft);
}
template<std::size_t dim>
inline auto create_plan(xt::xarray<double> const& input, xt::xarray<xtl::xcomplex<double>>& output,
unsigned int flags)
{
using fftw_input_t = double;
using fftw_output_t = fftw_complex;
template<std::size_t dim>
inline auto create_plan(xt::xarray<double> const& input,
xt::xarray<std::complex<double>>& output, unsigned int flags) -> void
{
using fftw_input_t = double;
using fftw_output_t = fftw_complex;
bool odd_last_dim = (input.shape()[input.shape().size() - 1] % 2 != 0);
bool odd_last_dim = (input.shape()[input.shape().size() - 1] % 2 != 0);
auto dft_dimensions_unsigned = xt::fftw::dft_dimensions_from_output(output, false, odd_last_dim);
std::vector<int> dft_dimensions;
dft_dimensions.reserve(dft_dimensions_unsigned.size());
std::transform(dft_dimensions_unsigned.begin(), dft_dimensions_unsigned.end(),
std::back_inserter(dft_dimensions), [&](std::size_t d) { return static_cast<int>(d); });
auto dft_dimensions_unsigned = xt::fftw::dft_dimensions_from_output(output, false, odd_last_dim);
std::vector<int> dft_dimensions;
dft_dimensions.reserve(dft_dimensions_unsigned.size());
std::transform(dft_dimensions_unsigned.begin(), dft_dimensions_unsigned.end(),
std::back_inserter(dft_dimensions), [&](std::size_t d) { return static_cast<int>(d); });
std::lock_guard<std::mutex> guard(fftw_global_mutex());
auto plan = fftw_plan_dft_r2c(static_cast<int>(dim), dft_dimensions.data(),
//std::cout << "dft unsigned : ";
//for(auto e: dft_dimensions_unsigned)
//{
// std::cout << e << ' ';
//}
//std::cout << '\n';
//std::cout << "dim : " << dim << '\n';
//std::cout << "dft : ";
//for(auto e: dft_dimensions)
//{
// std::cout << e << ' ';
//}
//std::cout << '\n';
std::lock_guard<std::mutex> guard(fftw_global_mutex());
plan = fftw_plan_dft_r2c(static_cast<int>(dim), dft_dimensions.data(),
const_cast<fftw_input_t*>(reinterpret_cast<const fftw_input_t*>(input.data())),
reinterpret_cast<fftw_output_t*>(output.data()), flags);
if(plan == nullptr)
{
XTENSOR_FFTW_THROW(
std::runtime_error,
"Plan creation returned nullptr. This usually means FFTW cannot create a plan for the given arguments "
"(e.g. a non-destructive multi-dimensional real FFT is impossible in FFTW).");
if(plan == nullptr)
{
XTENSOR_FFTW_THROW(std::runtime_error,
"Plan creation returned nullptr. This usually means FFTW cannot create a plan for "
"the given arguments "
"(e.g. a non-destructive multi-dimensional real FFT is impossible in FFTW).");
}
std::cout << "plan created\n";
plan_exists = true;
}
return plan;
}
inline auto execute_plan(fftw_plan plan) -> void
{
if(plan == nullptr)
inline auto execute_plan(xt::xarray<double> input, xt::xarray<std::complex<double>>& output) const
-> void
{
XTENSOR_FFTW_THROW(
std::runtime_error,
"Plan creation returned nullptr. This usually means FFTW cannot create a plan for the given "
"arguments (e.g. a non-destructive multi-dimensional real FFT is impossible in FFTW).");
using fftw_input_t = double;
using fftw_output_t = fftw_complex;
if(plan == nullptr)
{
XTENSOR_FFTW_THROW(
std::runtime_error,
"Plan creation returned nullptr. This usually means FFTW cannot create a plan for the given "
"arguments (e.g. a non-destructive multi-dimensional real FFT is impossible in FFTW).");
}
fftw_execute_dft_r2c(plan, const_cast<fftw_input_t*>(reinterpret_cast<const fftw_input_t*>(input.data())),
reinterpret_cast<fftw_output_t*>(output.data()));
}
fftw_execute(plan);
}
auto destroy_plan(fftw_plan plan)
~fft() {}
fftw_plan plan;
fftw_plan plan_inv;
bool plan_exists{false};
bool plan_inv_exists{false};
};
template<typename ValueType>
struct fftw_traits;
template<>
struct fftw_traits<double>
{
std::lock_guard<std::mutex> guard(fftw_global_mutex());
fftw_destroy_plan(plan);
}
using fftw_plan_type = fftw_plan;
};
// template<typename T, typename U,
// typename F = std::conditional_t<(std::is_same_v<T, double> || std::is_same_v<T,
......
......@@ -17,10 +17,12 @@
#include <fftw3.h>
#include <utility>
#include <xsimd/memory/xsimd_load_store.hpp>
#include <xtensor/xadapt.hpp>
#include <xtensor/xarray.hpp>
#include <xtensor/xbuilder.hpp>
#include <xtensor/xmanipulation.hpp>
#include <xtensor/xmath.hpp>
#include <xtensor/xpad.hpp>
#include <xtensor/xstrided_view.hpp>
#include <xtensor/xtensor_forward.hpp>
......@@ -210,6 +212,35 @@ namespace scalfmm::tensor
std::forward<result_of_reshaped>(xt::reshape_view(std::forward<Matrix>(m), std::move(new_shape))), 0, axe);
}
template<typename ValueType, typename ScalarType>
inline auto product(xt::xarray<ValueType>& accumulate, xt::xarray<ValueType> const& t,
xt::xarray<ValueType> const& k, ScalarType scalar)
{
using value_type = ValueType;
using simd_type = xsimd::simd_type<value_type>;
constexpr std::size_t inc = simd_type::size;
const std::size_t size = accumulate.size();
const std::size_t vec_size = size - size % inc;
//simd_type acc(0.);
value_type scale_cp(scalar);
simd_type splat(scale_cp);
for(std::size_t i{0}; i<vec_size; i+=inc)
{
auto t_ = xsimd::load_aligned(&t.data()[i]);
auto k_ = xsimd::load_aligned(&k.data()[i]);
auto acc_ = xsimd::load_aligned(&accumulate.data()[i]);
auto times = k_*splat;
//acc_ += t_ * k_ * splat;
auto tmp = xsimd::fma(t_, times, acc_);
xsimd::store_aligned(&accumulate.data()[i], tmp);
}
for(std::size_t i{vec_size}; i<size; ++i)
{
accumulate.data()[i] += t.data()[i]*k.data()[i]*scale_cp; //, accumulate.data()[i])
//accumulate.data()[i] = xsimd::fma(t.data()[i],k.data()[i]*scale_cp, accumulate.data()[i]);
}
}
} // namespace scalfmm::tensor
#endif // SCALFMM_UTILS_TENSOR_HPP
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment