Mise à jour terminée. Pour connaître les apports de la version 13.8.4 par rapport à notre ancienne version vous pouvez lire les "Release Notes" suivantes :
https://about.gitlab.com/releases/2021/02/11/security-release-gitlab-13-8-4-released/
https://about.gitlab.com/releases/2021/02/05/gitlab-13-8-3-released/

Commit 951430ff authored by Quentin Khan's avatar Quentin Khan

FAdaptiveTask: refactor task dependency management

parent 25ac6e64
......@@ -64,12 +64,32 @@ protected:
std::list<std::array<char, bucket_size>> pool;
std::size_t mock_dependency_index = bucket_size;
template<typename T = node_t>
T* next() {
return (T*) (&(this->pool.back()[mock_dependency_index++]));
char* next() {
return (char*) (&(this->pool.back()[mock_dependency_index++]));
}
} mock_dep;
struct dep_t {
enum {M, L, P_s, P_t, size};
};
template<std::size_t I>
char* get_dependency(node_t* node) const {
static_assert(I < dep_t::size, "Dependency identifier does not exist (template parameter I)");
if(I == dep_t::M) {
return (char*) node->getData();
} else if(I == dep_t::L) {
using type = typename std::remove_cv<
typename std::decay<decltype((node->getDepth()))>::type>::type;
return (char*) const_cast<type*>(&(node->getDepth()));
} else if(I == dep_t::P_s) {
return (char*) node;
} else if(I == dep_t::P_t) {
return (char*) node->getParticleContainer();
}
}
public:
FAdaptiveTask(tree_t& tree, kernel_t& kernel) :
......@@ -120,12 +140,6 @@ public:
std::cout << "Task adaptive algorithm (" << omp_get_num_threads() << " threads)" << std::endl;
std::cout << "Master thread: " << omp_get_thread_num() << std::endl;
if(operations & FFmmP2P) {
// A. U-list, P2P
timer.time([this](){this->u_list_step();});
std::cout << " P2P: " << timer.last().count() << '\n';
}
if(operations & FFmmP2M) {
// 1. source to up, P2M
timer.time([this](){this->source_to_up();});
......@@ -138,6 +152,12 @@ public:
std::cout << " P2L: " << timer.last().count() << '\n';
}
if(operations & FFmmP2P) {
// A. U-list, P2P
timer.time([this](){this->u_list_step();});
std::cout << " P2P: " << timer.last().count() << '\n';
}
if(operations & FFmmM2M) {
// 2. up to up, M2M
timer.time([this](){this->up_to_up();});
......@@ -181,17 +201,18 @@ public:
void source_to_up() {
for(node_t* leaf : _tree.leaves()) {
auto data = leaf->getData();
auto particle_container = leaf->getParticleContainer();
#pragma omp task firstprivate(leaf, data, particle_container) \
depend(inout: leaf[:1]) \
depend(out: data[:1])
char* ps_dep = get_dependency<dep_t::P_s>(leaf);(void)ps_dep;
char* m_dep = get_dependency<dep_t::M>(leaf);(void)m_dep;
#pragma omp task firstprivate(leaf) \
depend(in: ps_dep[:1]) \
depend(inout: m_dep[:1])
{
const int thread_num = omp_get_thread_num();
_kernels[thread_num]->P2M(data, particle_container);
_kernels[thread_num]->P2M(leaf->getData(), leaf->getParticleContainer());
}
}
// #pragma omp taskwait
}
// M2M
......@@ -203,24 +224,25 @@ public:
// Setup task dependencies
// children data
typename node_t::data_t* children_data[node_t::child_count] = {};
char* children_dep[node_t::child_count] = {};
for(node_t* child : node->getChildren()) {
children_data[child->getIndex() & (node_t::child_count-1)] = child->getData();
children_dep[child->getIndex() & (node_t::child_count-1)]
= get_dependency<dep_t::M>(child);
}
// node data
typename node_t::data_t* data = node->getData();
char* parent_dep = get_dependency<dep_t::M>(node); (void) parent_dep;
#pragma omp task \
depend(in: \
children_data[0][:1], \
children_data[1][:1], \
children_data[2][:1], \
children_data[3][:1], \
children_data[4][:1], \
children_data[5][:1], \
children_data[6][:1], \
children_data[7][:1]) \
depend(out: data[:1])
children_dep[0][:1], \
children_dep[1][:1], \
children_dep[2][:1], \
children_dep[3][:1], \
children_dep[4][:1], \
children_dep[5][:1], \
children_dep[6][:1], \
children_dep[7][:1]) \
depend(out: parent_dep[:1])
{
const int thread_num = omp_get_thread_num();
......@@ -253,8 +275,8 @@ public:
// The array of dependencies, we know that there cannot be more than
// 7^Dim cells involved in a M2L, in 3D, this is 343
typename node_t::data_t* task_deps[343];
typename node_t::data_t* data = node->getData();
char* task_deps[343];
char* data_dep = get_dependency<dep_t::L>(node);(void) data_dep;
std::size_t idx_dep = 0;
// Add existing dependencies
for(node_t* v_item : node->V) {
......@@ -262,13 +284,13 @@ public:
&& v_item->getParticleContainer()->size() == 0) {
continue;
}
task_deps[idx_dep] = v_item->getData();
task_deps[idx_dep] = get_dependency<dep_t::M>(v_item);
++idx_dep;
}
// Add mock dependencies, these are generated on the fly and used
// only once, that way they can never stop a task from starting
while(idx_dep < 343) {
task_deps[idx_dep] = this->mock_dep.template next<typename node_t::data_t>();
task_deps[idx_dep] = this->mock_dep.next();
++idx_dep;
}
......@@ -617,7 +639,7 @@ public:
task_deps[340][:1], \
task_deps[341][:1], \
task_deps[342][:1] \
) depend(inout: data[:1]) firstprivate(node)
) depend(inout: data_dep[:1]) firstprivate(node)
{
const int thread_num = omp_get_thread_num();
......@@ -653,9 +675,10 @@ public:
if(leaf->getParticleContainer()->size() > 0) {
for(node_t* w_item : leaf->W) {
auto w_data = w_item->getData();
char* w_dep = get_dependency<dep_t::L>(w_item);(void)w_dep;
char* ps_dep = get_dependency<dep_t::P_s>(leaf);(void)ps_dep;
#pragma omp task depend(in: leaf[:1]) depend(inout: w_data[:1])
#pragma omp task firstprivate(leaf, w_item) depend(in: ps_dep[:1]) depend(inout: w_dep[:1])
{
const int thread_num = omp_get_thread_num();
this->_kernels[thread_num]->P2L(w_item->getData(), leaf->getParticleContainer());
......@@ -671,23 +694,23 @@ public:
for(node_t& n : _tree.pre_order_walk()) {
node_t* node = &n;
if(! node->is_leaf()) {
typename node_t::data_t* data = node->getData();
typename node_t::data_t* children_data[node_t::child_count];
char* parent_dep = get_dependency<dep_t::L>(node);(void)parent_dep;
char* children_dep[node_t::child_count];
for(std::size_t i = 0; i < node_t::child_count; ++i) {
children_data[i] = node->getChild(i)->getData();
children_dep[i] = get_dependency<dep_t::L>(node->getChild(i));
}
#pragma omp task \
depend(in: data[:1]) \
depend(inout: \
children_data[0][:1], \
children_data[1][:1], \
children_data[2][:1], \
children_data[3][:1], \
children_data[4][:1], \
children_data[5][:1], \
children_data[6][:1], \
children_data[7][:1] \
#pragma omp task \
depend(in: parent_dep[:1]) \
depend(inout: \
children_dep[0][:1], \
children_dep[1][:1], \
children_dep[2][:1], \
children_dep[3][:1], \
children_dep[4][:1], \
children_dep[5][:1], \
children_dep[6][:1], \
children_dep[7][:1] \
)
{
const int thread_num = omp_get_thread_num();
......@@ -708,10 +731,10 @@ public:
for(node_t* leaf : _tree.leaves()) {
if(leaf->getParticleContainer()->size() > 0) {
for(node_t* w_item : leaf->W) {
auto w_data = w_item->getData();
auto particle_container = leaf->getParticleContainer();
char* w_dep = get_dependency<dep_t::M>(w_item); (void)w_dep;
char* pt_dep = get_dependency<dep_t::P_t>(leaf); (void)pt_dep;
#pragma omp task depend(inout: particle_container[:1]) depend(in: w_data[:1])
#pragma omp task depend(inout: pt_dep[:1]) depend(in: w_dep[:1])
{
const int thread_num = omp_get_thread_num();
this->_kernels[thread_num]->M2P(w_item->getData(), leaf->getParticleContainer());
......@@ -725,9 +748,9 @@ public:
// L2P
void down_to_target() {
for(node_t* leaf : _tree.leaves()) {
auto data = leaf->getData();
auto particle_container = leaf->getParticleContainer();
#pragma omp task depend(inout: particle_container[:1]) depend(in: data[:1])
char* data_dep = get_dependency<dep_t::L>(leaf); (void)data_dep;
char* pt_dep = get_dependency<dep_t::P_t>(leaf); (void)pt_dep;
#pragma omp task depend(inout: pt_dep[:1]) depend(in: data_dep[:1])
{
const int thread_num = omp_get_thread_num();
this->_kernels[thread_num]->L2P(leaf->getData(), leaf->getParticleContainer());
......@@ -788,28 +811,33 @@ public:
while(it != leaf->U.end()) {
constexpr const std::size_t max_task_size = 27;
std::size_t i = 0;
container_t* task_deps[max_task_size];
auto first = it;
char* task_deps[max_task_size];
char* leaf_s_dep = get_dependency<dep_t::P_s>(leaf);(void)leaf_s_dep;
char* leaf_t_dep = get_dependency<dep_t::P_t>(leaf);(void)leaf_t_dep;
while((it != leaf->U.end()) && (i < max_task_size)) {
task_deps[i] = (*it)->getParticleContainer();
if((*it) == leaf || (*it)->getParticleContainer()->size() == 0) {
++it;
continue;
}
task_deps[i] = get_dependency<dep_t::P_t>(*it);
++i;
++it;
}
while(i < max_task_size) {
task_deps[i] = this->mock_dep.template next<container_t>();
task_deps[i] = this->mock_dep.next();
++i;
}
#pragma omp task \
#pragma omp task \
firstprivate(leaf, leaf_source_particle_container, leaf_target_particle_container, first, it, do_inner) \
depend(inout: \
leaf_source_particle_container[:1], \
leaf_target_particle_container[:1], \
leaf_s_dep[:1], \
leaf_t_dep[:1], \
task_deps[0][:1], \
task_deps[1][:1], \
task_deps[2][:1], \
......@@ -836,8 +864,8 @@ public:
task_deps[23][:1], \
task_deps[24][:1], \
task_deps[25][:1], \
task_deps[26][:1] \
)
task_deps[26][:1] \
)
{
const int thread_num = omp_get_thread_num();
// Vectors to be filled after sort and passed to kernel P2P
......@@ -872,7 +900,7 @@ public:
}
}
// #pragma omp taskwait
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment