Commit 39d34cc8 authored by Quentin Khan's avatar Quentin Khan

Update the adaptive FMM to the new kernel data layout

 - Use the multipole_t, local_expansion_t and symbolic_data_t type
   definitions from the kernel data.

 - Adapt the FMM operators calls to use the new interface.

 - In many places, new lists have to be created from the original cell
   list to get the specific parts of the data we need. The impact on
   performance has yet to be studied.

 - Simplify/shorten code using the above type definitions. Especially
   for the StarPU implementation.

 - A default symbolic_data_t type is provided by the FNode class when
   the kernel data does not provide it (which it only should for passing
   custom symbolic data).

 - Update the UnifFlopsKernel to use the new layout.
parent aafafbd1
......@@ -14,16 +14,12 @@
#include "Kernels/FKernelConcepts.hpp"
template<class _Tree, class _Kernel//,
// typename std::enable_if<scalfmm::sfinae::has_P2M<_Tree, _Kernel>::value>::type* = nullptr,
// typename std::enable_if<scalfmm::sfinae::has_M2M<_Tree, _Kernel>::value>::type* = nullptr,
// typename std::enable_if<scalfmm::sfinae::has_M2L<_Tree, _Kernel>::value>::type* = nullptr,
// typename std::enable_if<scalfmm::sfinae::has_L2L<_Tree, _Kernel>::value>::type* = nullptr,
// typename std::enable_if<scalfmm::sfinae::has_L2P<_Tree, _Kernel>::value>::type* = nullptr,
// typename std::enable_if<scalfmm::sfinae::has_P2P<_Tree, _Kernel>::value>::type* = nullptr,
// typename std::enable_if<scalfmm::sfinae::has_M2P<_Tree, _Kernel>::value>::type* = nullptr,
// typename std::enable_if<scalfmm::sfinae::has_P2L<_Tree, _Kernel>::value>::type* = nullptr
>
template<
class _Tree, class _Kernel,
class = inria::require<
scalfmm::meta::adaptive_compatible<_Tree,_Kernel>
>
>
class FAdaptiveSequential : public FAlgorithmInterface, public FAlgorithmTimers {
public:
using tree_t = _Tree;
......@@ -32,6 +28,10 @@ public:
using node_t = typename tree_t::node_t;
using multipole_t = typename node_t::data_t::multipole_t;
using local_expansion_t = typename node_t::data_t::local_expansion_t;
using symbolic_data_t = typename node_t::symbolic_data_t;
private:
tree_t& _tree;
kernel_t& _kernel;
......@@ -164,24 +164,37 @@ public:
void up_to_up() {
for(node_t& node : _tree.post_order_walk()) {
if(! node.is_leaf()) {
// Fill array of child data
std::array<multipole_t*, node_t::child_count> child_multipoles {};
std::transform(std::begin(node.getChildren()), std::end(node.getChildren()),
child_multipoles.begin(),
[](node_t* n) {
return n == nullptr ? nullptr
: &(n->getData()->getMultipoleData());
});
std::array<symbolic_data_t*, node_t::child_count> child_symbolics {};
std::transform(std::begin(node.getChildren()), std::end(node.getChildren()),
child_symbolics.begin(),
[](node_t* n) {
return n == nullptr ? nullptr
: &(n->getSymbolicData());
});
// Fill array of children data
typename node_t::data_t::multipole_t* child_data[node_t::child_count] = {};
for(node_t* child : node.getChildren()) {
child_data[child->getIndex() & (node_t::child_count-1)]
= &(child->getData()->getMultipoleData());
}
// Call kernel module
_kernel.M2M(&(node.getData()->getMultipoleData()),
child_data,
static_cast<int>(node.getDepth()));
&(node.getSymbolicData()),
child_multipoles.data(),
child_symbolics.data()
);
}
}
}
// M2L
void v_list_step() {
std::vector<typename node_t::data_t::multipole_t*> v_item_data_list;
std::vector<multipole_t*> v_item_multipoles;
std::vector<symbolic_data_t*> v_item_symbolics;
std::vector<int> v_item_indices;
for(node_t& node : _tree.in_order_walk()) {
......@@ -189,7 +202,8 @@ public:
continue;
}
v_item_data_list.clear();
v_item_multipoles.clear();
v_item_symbolics.clear();
v_item_indices.clear();
// Needed to compute offset between boxes
......@@ -199,15 +213,18 @@ public:
continue;
}
v_item_indices.push_back(compute_box_offset_index(&node, v_item, 3));
v_item_data_list.push_back(&(v_item->getData()->getMultipoleData()));
v_item_multipoles.push_back(&(v_item->getData()->getMultipoleData()));
v_item_symbolics.push_back(&(v_item->getSymbolicData()));
}
// Call kernel M2L operator
_kernel.M2L(&(node.getData()->getLocalExpansionData()),
v_item_data_list.data(),
&(node.getSymbolicData()),
v_item_multipoles.data(),
v_item_symbolics.data(),
v_item_indices.data(),
static_cast<int>(v_item_data_list.size()),
static_cast<int>(node.getDepth()));
static_cast<int>(v_item_multipoles.size())
);
}
}
......@@ -218,15 +235,17 @@ public:
*
* We loop over the leaves first to detect the empty ones early on.
*/
for(node_t* node : _tree.leaves()) {
if(node->getParticleContainer()->size() > 0) {
for(node_t* w_item : node->W) {
for(node_t* leaf : _tree.leaves()) {
if(leaf->getParticleContainer()->size() > 0) {
for(node_t* w_item : leaf->W) {
if(w_item->is_leaf() && w_item->getParticleContainer()->size() == 0) {
continue;
}
_kernel.P2L(&(w_item->getData()->getLocalExpansionData()),
&(w_item->getSymbolicData()),
node->getParticleContainer());
leaf->getParticleContainer(),
&(leaf->getSymbolicData())
);
}
}
}
......@@ -236,28 +255,45 @@ public:
void down_to_down() {
for(node_t& node : _tree.pre_order_walk()) {
if(! node.is_leaf()) {
typename node_t::data_t::local_expansion_t* child_data[node_t::child_count];
for(std::size_t i = 0; i < node_t::child_count; ++i) {
child_data[i] = &(node.getChild(i)->getData()->getLocalExpansionData());
}
std::array<local_expansion_t*, node_t::child_count> child_local_expansions {};
std::transform(std::begin(node.getChildren()), std::end(node.getChildren()),
child_local_expansions.begin(),
[](node_t* n) {
return n == nullptr ? nullptr
: &(n->getData()->getLocalExpansionData());
});
std::array<symbolic_data_t*, node_t::child_count> child_symbolics {};
std::transform(std::begin(node.getChildren()), std::end(node.getChildren()),
child_symbolics.begin(),
[](node_t* n) {
return n == nullptr ? nullptr
: &(n->getSymbolicData());
});
_kernel.L2L(&(node.getData()->getLocalExpansionData()),
child_data,
static_cast<int>(node.getDepth()));
&(node.getSymbolicData()),
child_local_expansions.data(),
child_symbolics.data()
);
}
}
}
// M2P
void w_list_step() {
for(node_t* node : _tree.leaves()) {
if(node->getParticleContainer()->size() > 0) {
for(node_t* w_item : node->W) {
for(node_t* leaf : _tree.leaves()) {
if(leaf->getParticleContainer()->size() > 0) {
for(node_t* w_item : leaf->W) {
if(w_item->is_leaf() && w_item->getParticleContainer()->size() == 0) {
continue;
}
_kernel.M2P(&(w_item->getData()->getMultipoleData()),
&(w_item->getSymbolicData()),
node->getParticleContainer());
leaf->getParticleContainer(),
&(leaf->getSymbolicData())
);
}
}
}
......
This diff is collapsed.
......@@ -23,15 +23,10 @@
#include "Kernels/FKernelConcepts.hpp"
template<class _Tree, class _Kernel// ,
// typename std::enable_if<scalfmm::sfinae::has_P2M<_Tree, _Kernel>::value>::type* = nullptr,
// typename std::enable_if<scalfmm::sfinae::has_M2M<_Tree, _Kernel>::value>::type* = nullptr,
// typename std::enable_if<scalfmm::sfinae::has_M2L<_Tree, _Kernel>::value>::type* = nullptr,
// typename std::enable_if<scalfmm::sfinae::has_L2L<_Tree, _Kernel>::value>::type* = nullptr,
// typename std::enable_if<scalfmm::sfinae::has_L2P<_Tree, _Kernel>::value>::type* = nullptr,
// typename std::enable_if<scalfmm::sfinae::has_partial_P2P<_Tree, _Kernel>::value>::type* = nullptr,
// typename std::enable_if<scalfmm::sfinae::has_M2P<_Tree, _Kernel>::value>::type* = nullptr,
// typename std::enable_if<scalfmm::sfinae::has_P2L<_Tree, _Kernel>::value>::type* = nullptr
template<class _Tree, class _Kernel,
class = inria::require<
scalfmm::meta::adaptive_compatible<_Tree,_Kernel>
>
>
class FAdaptiveTask : public FAlgorithmInterface, public FAlgorithmTimers {
public:
......@@ -41,6 +36,10 @@ public:
using node_t = typename tree_t::node_t;
using multipole_t = typename node_t::data_t::multipole_t;
using local_expansion_t = typename node_t::data_t::local_expansion_t;
using symbolic_data_t = typename node_t::symbolic_data_t;
protected:
/// Tree
tree_t& _tree;
......@@ -128,8 +127,8 @@ protected:
if(I == dep_t::M) {
return (const char*) node->getData();
} else if(I == dep_t::L) {
using type = typename std::decay<decltype((node->getDepth()))>::type;
return (const char*) const_cast<type*>(&(node->getDepth()));
using type = typename std::decay<decltype((node->getSymbolicData()))>::type;
return (const char*) const_cast<type*>(&(node->getSymbolicData()));
} else if(I == dep_t::P_s) {
return (const char*) node;
} else if(I == dep_t::P_t) {
......@@ -348,16 +347,29 @@ public:
{
const int thread_num = omp_get_thread_num();
// Fill array of children data
typename node_t::data_t::multipole_t* child_data[node_t::child_count] = {};
for(node_t* child : node->getChildren()) {
child_data[child->getIndex() & (node_t::child_count-1)]
= &(child->getData()->getMultipoleData());
}
std::array<multipole_t*, node_t::child_count> child_multipoles {};
std::transform(std::begin(node->getChildren()), std::end(node->getChildren()),
child_multipoles.begin(),
[](node_t* n) {
return n == nullptr ? nullptr
: &(n->getData()->getMultipoleData());
});
std::array<symbolic_data_t*, node_t::child_count> child_symbolics {};
std::transform(std::begin(node->getChildren()), std::end(node->getChildren()),
child_symbolics.begin(),
[](node_t* n) {
return n == nullptr ? nullptr
: &(n->getSymbolicData());
});
// Call kernel module
this->_kernels[thread_num]->M2M(&(node->getData()->getMultipoleData()),
child_data,
static_cast<int>(node->getDepth()));
this->_kernels[thread_num]->M2M(
&(node->getData()->getMultipoleData()),
&(node->getSymbolicData()),
child_multipoles.data(),
child_symbolics.data()
);
}
}
......@@ -753,8 +765,10 @@ public:
const int thread_num = omp_get_thread_num();
std::vector<typename node_t::data_t::multipole_t*> v_item_data_list;
std::vector<multipole_t*> v_item_multipoles;
std::vector<symbolic_data_t*> v_item_symbolics;
std::vector<int> v_item_indices;
// Needed to compute offset between boxes
for(node_t* v_item : node->V) {
if(v_item->is_leaf()
......@@ -762,16 +776,19 @@ public:
continue;
}
v_item_indices.push_back(compute_box_offset_index(node, v_item, 3));
v_item_data_list.push_back(&(v_item->getData()->getMultipoleData()));
v_item_multipoles.push_back(&(v_item->getData()->getMultipoleData()));
v_item_symbolics.push_back(&(v_item->getSymbolicData()));
}
// Call kernel M2L operator
this->_kernels[thread_num]->M2L(
&(node->getData()->getLocalExpansionData()),
v_item_data_list.data(),
&(node->getSymbolicData()),
v_item_multipoles.data(),
v_item_symbolics.data(),
v_item_indices.data(),
static_cast<int>(v_item_data_list.size()),
static_cast<int>(node->getDepth()));
static_cast<int>(v_item_multipoles.size())
);
}
}
......@@ -811,7 +828,9 @@ public:
this->_kernels[thread_num]->P2L(
&(w_item->getData()->getLocalExpansionData()),
&(w_item->getSymbolicData()),
leaf->getParticleContainer());
leaf->getParticleContainer(),
&(leaf->getSymbolicData())
);
}
}
}
......@@ -858,14 +877,28 @@ public:
{
const int thread_num = omp_get_thread_num();
typename node_t::data_t::local_expansion_t* child_data[node_t::child_count];
for(std::size_t i = 0; i < node_t::child_count; ++i) {
child_data[i] = &(node->getChild(i)->getData()->getLocalExpansionData());
}
this->_kernels[thread_num]->L2L(
&(node->getData()->getLocalExpansionData()),
child_data,
static_cast<int>(node->getDepth()));
std::array<local_expansion_t*, node_t::child_count> child_local_expansions {};
std::transform(std::begin(node->getChildren()), std::end(node->getChildren()),
child_local_expansions.begin(),
[](node_t* n) {
return n == nullptr ? nullptr
: &(n->getData()->getLocalExpansionData());
});
std::array<symbolic_data_t*, node_t::child_count> child_symbolics {};
std::transform(std::begin(node->getChildren()), std::end(node->getChildren()),
child_symbolics.begin(),
[](node_t* n) {
return n == nullptr ? nullptr
: &(n->getSymbolicData());
});
this->_kernels[thread_num]->L2L(
&(node->getData()->getLocalExpansionData()),
&(node->getSymbolicData()),
child_local_expansions.data(),
child_symbolics.data()
);
}
}
......@@ -901,7 +934,9 @@ public:
this->_kernels[thread_num]->M2P(
&(w_item->getData()->getMultipoleData()),
&(w_item->getSymbolicData()),
leaf->getParticleContainer());
leaf->getParticleContainer(),
&(leaf->getSymbolicData())
);
}
}
}
......
......@@ -14,12 +14,14 @@
#include <unordered_set>
#include "Utils/Contribs/inria/ostream_joiner.hpp"
#include "Utils/Contribs/inria/detection_idiom.hpp"
#include "FBox.hpp"
#include "Utils/FPoint.hpp"
#include "Utils/FConstFuncs.hpp"
#include "Utils/FOstreamTuple.hpp"
#include "Components/FSymbolicData.hpp"
#include "Components/FBasicCell.hpp"
namespace scalfmm {
......@@ -199,28 +201,36 @@ public:
/// Node data structure
using data_t = NodeData;
/** \brief Node structural data
* Keeps data about the node that may be read by kernels or algorithms.
*/
struct symbolic_data_t {
/// Node depth in its tree
std::size_t depth;
/// Node index in parent child array
std::size_t m_idx;
std::size_t getLevel() const {
return this->depth;
}
std::size_t getMortonIndex() const {
return this->m_idx;
}
FTreeCoordinate getCoordinate() const noexcept {
return FTreeCoordinate(this->m_idx);
}
friend std::ostream& operator<<(std::ostream& os, const symbolic_data_t& d) {
return (os << '{' << "\"depth\":" <<d.depth << ',' << "\"index\":" << d.m_idx << '}');
}
};
using symbolic_data_t
= typename extract_symbolic_data_t_or_fallback_to_default<FSymbolicData,data_t>::type;
static_assert(models_symbolic_data<symbolic_data_t>::value,
"The symbolic_data_t type does not model the required symbolic data interface.");
// struct symbolic_data_t {
// /// Node depth in its tree
// std::size_t depth;
// /// Node index in parent child array
// std::size_t m_idx;
// std::size_t getLevel() const {
// return this->depth;
// }
// std::size_t getMortonIndex() const {
// return this->m_idx;
// }
// FTreeCoordinate getCoordinate() const noexcept {
// return FTreeCoordinate(this->m_idx);
// }
// friend std::ostream& operator<<(std::ostream& os, const symbolic_data_t& d) {
// return (os << '{' << "\"depth\":" <<d.depth << ',' << "\"index\":" << d.m_idx << '}');
// }
// };
private:
......@@ -249,9 +259,7 @@ private:
bool _is_leaf = true;
/// Node data that may be of use to algorithms and kernels
struct symbolic_data_t _symbolic_data;
symbolic_data_t _symbolic_data;
public:
/// Near-field leaves interaction list
......@@ -272,7 +280,7 @@ public:
_parent(&parent),
_box (parent.getBox().center(), parent.getBox().corner(child_index) ),
_tree (parent._tree ),
_symbolic_data{parent.getDepth()+1, (parent.getIndex() << Dim) + child_index}
_symbolic_data{static_cast<int>(parent.getDepth()+1), (parent.getIndex() << Dim) + child_index}
{
if (child_index >= child_count) {
throw std::invalid_argument(std::string("Wrong child index in node contructor: got ")
......@@ -372,12 +380,12 @@ public:
}
/// Depth accessor
const std::size_t& getDepth() const noexcept {
std::size_t getDepth() const noexcept {
return _symbolic_data.depth;
}
/// Morton index accessor
const std::size_t& getIndex() const noexcept {
std::size_t getIndex() const noexcept {
return _symbolic_data.m_idx;
}
......
......@@ -84,7 +84,11 @@ struct FUnifFlopsKernel {
* \param leaf_data Leaf data
* \param source_particle_container Leaf particle container
*/
void P2M(CellClass* const /*leaf_data*/, const ContainerClass* const source_particle_container) {
template<class SymbolicData>
void P2M(typename CellClass::multipole_t* const,
const SymbolicData* const,
const ContainerClass* const source_particle_container)
{
std::size_t flops = source_particle_container->size() * (3 * Fpow(ORDER,3) + 3 * 3 * ORDER * (ORDER-1));
_flops[operators::P2M] += flops;
_flops[operators::ALL] += flops;
......@@ -100,9 +104,11 @@ struct FUnifFlopsKernel {
* \param child_data Array of pointer to children node data
* \param unnamed Unused, level of the parent
*/
void M2M(CellClass* const /*node_data*/,
const CellClass * const * const /*child_data*/,
const int /*level*/)
template<class SymbolicData>
void M2M(typename CellClass::multipole_t* const FRestrict,
const SymbolicData* const,
const typename CellClass::multipole_t*const FRestrict *const FRestrict,
const SymbolicData* const [])
{
for(std::size_t idx = 0 ; idx < child_count ; ++idx){
std::size_t flops = 3 * Fpow(ORDER,3) *(2*ORDER-1);
......@@ -126,11 +132,13 @@ struct FUnifFlopsKernel {
*
* \note All nodes are at the same level
*/
void M2L(CellClass* const /*node_data*/,
const CellClass* /*v_item_data*/[],
const int /*position*/[],
const int v_item_data_size,
const int /*level*/)
template<class SymbolicData>
void M2L(typename CellClass::local_expansion_t * const FRestrict ,
const SymbolicData* const ,
const typename CellClass::multipole_t * const FRestrict [],
const SymbolicData* const FRestrict [],
const int [],
const int v_item_data_size)
{
for(int idx = 0 ; idx < v_item_data_size ; ++idx){
std::size_t flops = Fpow(ORDER, 3);
......@@ -149,9 +157,11 @@ struct FUnifFlopsKernel {
* \param child_data Child node data pointer array
* \param unnamed Unused, level of the parent node
*/
void L2L(const CellClass* const /*node_data*/,
CellClass** const /*child_data*/,
const int /*level*/)
template<class SymbolicData>
void L2L(const typename CellClass::local_expansion_t * const FRestrict,
const SymbolicData* const,
typename CellClass::local_expansion_t * FRestrict * const FRestrict,
const SymbolicData* const [])
{
for(std::size_t idx = 0 ; idx < child_count ; ++idx){
std::size_t flops = 3 * Fpow(ORDER,3) *(2*ORDER-1);
......@@ -169,8 +179,11 @@ struct FUnifFlopsKernel {
* \param node_data Local developmet node data
* \param source_particle_container Particle container
*/
void P2L(CellClass* const /*node_data*/,
const ContainerClass* const source_particle_container)
template<class SymbolicData>
void P2L(typename CellClass::local_expansion_t* const,
const SymbolicData * const,
const ContainerClass* const source_particle_container,
const SymbolicData * const)
{
std::size_t flops = source_particle_container->size() * NVALS * _matrixFlopsKernel.evaluate();
_flops[operators::P2L] += flops;
......@@ -186,8 +199,11 @@ struct FUnifFlopsKernel {
* \param leaf_data Leaf node data
* \param target_particle_container Leaf particle container
*/
void L2P(const CellClass* const /*leaf_data*/,
ContainerClass* const target_particle_container) {
template<class SymbolicData>
void L2P(const typename CellClass::local_expansion_t* const,
const SymbolicData* const,
ContainerClass* const target_particle_container)
{
std::size_t flops = target_particle_container->size() * (4 * Fpow(ORDER,3) + 9 * ORDER * (ORDER-1));
_flops[operators::L2P] += flops;
_flops[operators::ALL] += flops;
......@@ -202,8 +218,12 @@ struct FUnifFlopsKernel {
* \param node_data Multipole node data
* \param target_particle_container Target particle container
*/
void M2P(const CellClass* const /*node_data*/,
ContainerClass* const target_particle_container) {
template<class SymbolicData>
void M2P(const typename CellClass::multipole_t* const,
const SymbolicData* const,
ContainerClass* const target_particle_container,
const SymbolicData * const)
{
std::size_t flops = target_particle_container->size() * NVALS * _matrixFlopsKernel.evaluateBlockAndDerivative();
_flops[operators::M2P] += flops;
_flops[operators::ALL] += flops;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment