From 15ea9a26cc07b254ade9c91c217a3786b55709c3 Mon Sep 17 00:00:00 2001 From: Quentin Khan <quentin.khan@inria.fr> Date: Mon, 4 Jul 2016 15:02:26 +0200 Subject: [PATCH] FAdaptiveTask: reformat to decorelate task creation from tree traversal --- Src/Adaptive/new/FAdaptiveTask.hpp | 1258 +++++++++++++++------------- 1 file changed, 665 insertions(+), 593 deletions(-) diff --git a/Src/Adaptive/new/FAdaptiveTask.hpp b/Src/Adaptive/new/FAdaptiveTask.hpp index 6a0830d88..4e8e943ce 100644 --- a/Src/Adaptive/new/FAdaptiveTask.hpp +++ b/Src/Adaptive/new/FAdaptiveTask.hpp @@ -135,6 +135,7 @@ protected: } else if(I == dep_t::P_t) { return (const char*) node->getParticleContainer(); } + return nullptr; } @@ -305,10 +306,20 @@ public: } } - // P2M + /** \brief Walk through leaves and queue P2M tasks */ void source_to_up() { - for(node_t* leaf : _tree.leaves()) { + this->create_P2M_task(leaf); + } + // #pragma omp taskwait + } + + /** + * \brief Create and queue a P2M task + * + * \param leaf The P2M leaf + */ + void create_P2M_task(node_t* leaf) { const char* ps_dep = get_dependency<dep_t::P_s>(leaf);(void)ps_dep; const char* m_dep = get_dependency<dep_t::M>(leaf);(void)m_dep; @@ -319,460 +330,475 @@ public: const int thread_num = omp_get_thread_num(); _kernels[thread_num]->P2M(leaf->getData(), leaf->getParticleContainer()); } + } + + + /** \brief Walk through tree and queue M2M tasks */ + void up_to_up() { + for(node_t& n : _tree.post_order_walk()) { + if(! n.is_leaf()) { + create_M2M_task(&n); + } } // #pragma omp taskwait } - // M2M - void up_to_up() { + /** + * \brief Create and queue an M2M task + * + * \param node An internal node + * + * \warning node is assumed not to be a leaf. + */ + void create_M2M_task(node_t* node) { + assert(! node->is_leaf()); + // Setup task dependencies + // children data + const char* children_dep[node_t::child_count] = {}; + for(node_t* child : node->getChildren()) { + children_dep[child->getIndex() & (node_t::child_count-1)] + = get_dependency<dep_t::M>(child); + } + // node data + const char* parent_dep = get_dependency<dep_t::M>(node); (void) parent_dep; + + #pragma omp task \ + depend(in: \ + children_dep[0][:1], \ + children_dep[1][:1], \ + children_dep[2][:1], \ + children_dep[3][:1], \ + children_dep[4][:1], \ + children_dep[5][:1], \ + children_dep[6][:1], \ + children_dep[7][:1]) \ + depend(out: parent_dep[:1]) + { + const int thread_num = omp_get_thread_num(); - for(node_t& n : _tree.post_order_walk()) { - node_t* node = &n; - if(! node->is_leaf()) { - - // Setup task dependencies - // children data - const char* children_dep[node_t::child_count] = {}; - for(node_t* child : node->getChildren()) { - children_dep[child->getIndex() & (node_t::child_count-1)] - = get_dependency<dep_t::M>(child); - } - // node data - const char* parent_dep = get_dependency<dep_t::M>(node); (void) parent_dep; - - #pragma omp task \ - depend(in: \ - children_dep[0][:1], \ - children_dep[1][:1], \ - children_dep[2][:1], \ - children_dep[3][:1], \ - children_dep[4][:1], \ - children_dep[5][:1], \ - children_dep[6][:1], \ - children_dep[7][:1]) \ - depend(out: parent_dep[:1]) - { - const int thread_num = omp_get_thread_num(); - - // Fill array of children data - std::array<typename node_t::data_t*, node_t::child_count> child_data; - for(node_t* child : node->getChildren()) { - /// Last bits of child index give the index relative to parent - child_data[child->getIndex() & (node_t::child_count-1)] = child->getData(); - } + // Fill array of children data + std::array<typename node_t::data_t*, node_t::child_count> child_data; + for(node_t* child : node->getChildren()) { + // Last bits of child index give the index relative to parent + child_data[child->getIndex() & (node_t::child_count-1)] = child->getData(); + } - // Call kernel module - this->_kernels[thread_num]->M2M(node->getData(), child_data.data(), - static_cast<int>(node->getDepth())); - } + // Call kernel module + this->_kernels[thread_num]->M2M(node->getData(), child_data.data(), + static_cast<int>(node->getDepth())); + } + } + + + + /** \brief Walk through tree and queue M2L tasks */ + void v_list_step() { + for(node_t& n : _tree.in_order_walk()) { + if(n.is_leaf() && n.getParticleContainer()->size() == 0) { + continue; } + create_M2L_task(&n); } // #pragma omp taskwait } - // M2L - void v_list_step() { - for(node_t& n : _tree.in_order_walk()) { - node_t* node = &n; - if(node->is_leaf() && node->getParticleContainer()->size() == 0) { + /** + * \brief Create and queue an M2L task + * + * \param node A tree node + */ + void create_M2L_task(node_t* node) { + + // Generate task dependencies + // There cannot be more than 7^Dim cells involved in a M2L, in 3D, this is 343 + const char* task_deps[343]; + const char* data_dep = get_dependency<dep_t::L>(node);(void) data_dep; + std::size_t idx_dep = 0; + // Add existing dependencies + for(node_t* v_item : node->V) { + if(v_item->is_leaf() + && v_item->getParticleContainer()->size() == 0) { continue; } + task_deps[idx_dep] = get_dependency<dep_t::M>(v_item); + ++idx_dep; + } + // Add mock dependencies, these are generated on the fly and used + // only once, that way they can never stop a task from starting + while(idx_dep < 343) { + task_deps[idx_dep] = this->mock_dep.next(); + ++idx_dep; + } + + #pragma omp task \ + depend(in: \ + task_deps[0][:1], \ + task_deps[1][:1], \ + task_deps[2][:1], \ + task_deps[3][:1], \ + task_deps[4][:1], \ + task_deps[5][:1], \ + task_deps[6][:1], \ + task_deps[7][:1], \ + task_deps[8][:1], \ + task_deps[9][:1], \ + task_deps[10][:1], \ + task_deps[11][:1], \ + task_deps[12][:1], \ + task_deps[13][:1], \ + task_deps[14][:1], \ + task_deps[15][:1], \ + task_deps[16][:1], \ + task_deps[17][:1], \ + task_deps[18][:1], \ + task_deps[19][:1], \ + task_deps[20][:1], \ + task_deps[21][:1], \ + task_deps[22][:1], \ + task_deps[23][:1], \ + task_deps[24][:1], \ + task_deps[25][:1], \ + task_deps[26][:1], \ + task_deps[27][:1], \ + task_deps[28][:1], \ + task_deps[29][:1], \ + task_deps[30][:1], \ + task_deps[31][:1], \ + task_deps[32][:1], \ + task_deps[33][:1], \ + task_deps[34][:1], \ + task_deps[35][:1], \ + task_deps[36][:1], \ + task_deps[37][:1], \ + task_deps[38][:1], \ + task_deps[39][:1], \ + task_deps[40][:1], \ + task_deps[41][:1], \ + task_deps[42][:1], \ + task_deps[43][:1], \ + task_deps[44][:1], \ + task_deps[45][:1], \ + task_deps[46][:1], \ + task_deps[47][:1], \ + task_deps[48][:1], \ + task_deps[49][:1], \ + task_deps[50][:1], \ + task_deps[51][:1], \ + task_deps[52][:1], \ + task_deps[53][:1], \ + task_deps[54][:1], \ + task_deps[55][:1], \ + task_deps[56][:1], \ + task_deps[57][:1], \ + task_deps[58][:1], \ + task_deps[59][:1], \ + task_deps[60][:1], \ + task_deps[61][:1], \ + task_deps[62][:1], \ + task_deps[63][:1], \ + task_deps[64][:1], \ + task_deps[65][:1], \ + task_deps[66][:1], \ + task_deps[67][:1], \ + task_deps[68][:1], \ + task_deps[69][:1], \ + task_deps[70][:1], \ + task_deps[71][:1], \ + task_deps[72][:1], \ + task_deps[73][:1], \ + task_deps[74][:1], \ + task_deps[75][:1], \ + task_deps[76][:1], \ + task_deps[77][:1], \ + task_deps[78][:1], \ + task_deps[79][:1], \ + task_deps[80][:1], \ + task_deps[81][:1], \ + task_deps[82][:1], \ + task_deps[83][:1], \ + task_deps[84][:1], \ + task_deps[85][:1], \ + task_deps[86][:1], \ + task_deps[87][:1], \ + task_deps[88][:1], \ + task_deps[89][:1], \ + task_deps[90][:1], \ + task_deps[91][:1], \ + task_deps[92][:1], \ + task_deps[93][:1], \ + task_deps[94][:1], \ + task_deps[95][:1], \ + task_deps[96][:1], \ + task_deps[97][:1], \ + task_deps[98][:1], \ + task_deps[99][:1], \ + task_deps[100][:1], \ + task_deps[101][:1], \ + task_deps[102][:1], \ + task_deps[103][:1], \ + task_deps[104][:1], \ + task_deps[105][:1], \ + task_deps[106][:1], \ + task_deps[107][:1], \ + task_deps[108][:1], \ + task_deps[109][:1], \ + task_deps[110][:1], \ + task_deps[111][:1], \ + task_deps[112][:1], \ + task_deps[113][:1], \ + task_deps[114][:1], \ + task_deps[115][:1], \ + task_deps[116][:1], \ + task_deps[117][:1], \ + task_deps[118][:1], \ + task_deps[119][:1], \ + task_deps[120][:1], \ + task_deps[121][:1], \ + task_deps[122][:1], \ + task_deps[123][:1], \ + task_deps[124][:1], \ + task_deps[125][:1], \ + task_deps[126][:1], \ + task_deps[127][:1], \ + task_deps[128][:1], \ + task_deps[129][:1], \ + task_deps[130][:1], \ + task_deps[131][:1], \ + task_deps[132][:1], \ + task_deps[133][:1], \ + task_deps[134][:1], \ + task_deps[135][:1], \ + task_deps[136][:1], \ + task_deps[137][:1], \ + task_deps[138][:1], \ + task_deps[139][:1], \ + task_deps[140][:1], \ + task_deps[141][:1], \ + task_deps[142][:1], \ + task_deps[143][:1], \ + task_deps[144][:1], \ + task_deps[145][:1], \ + task_deps[146][:1], \ + task_deps[147][:1], \ + task_deps[148][:1], \ + task_deps[149][:1], \ + task_deps[150][:1], \ + task_deps[151][:1], \ + task_deps[152][:1], \ + task_deps[153][:1], \ + task_deps[154][:1], \ + task_deps[155][:1], \ + task_deps[156][:1], \ + task_deps[157][:1], \ + task_deps[158][:1], \ + task_deps[159][:1], \ + task_deps[160][:1], \ + task_deps[161][:1], \ + task_deps[162][:1], \ + task_deps[163][:1], \ + task_deps[164][:1], \ + task_deps[165][:1], \ + task_deps[166][:1], \ + task_deps[167][:1], \ + task_deps[168][:1], \ + task_deps[169][:1], \ + task_deps[170][:1], \ + task_deps[171][:1], \ + task_deps[172][:1], \ + task_deps[173][:1], \ + task_deps[174][:1], \ + task_deps[175][:1], \ + task_deps[176][:1], \ + task_deps[177][:1], \ + task_deps[178][:1], \ + task_deps[179][:1], \ + task_deps[180][:1], \ + task_deps[181][:1], \ + task_deps[182][:1], \ + task_deps[183][:1], \ + task_deps[184][:1], \ + task_deps[185][:1], \ + task_deps[186][:1], \ + task_deps[187][:1], \ + task_deps[188][:1], \ + task_deps[189][:1], \ + task_deps[190][:1], \ + task_deps[191][:1], \ + task_deps[192][:1], \ + task_deps[193][:1], \ + task_deps[194][:1], \ + task_deps[195][:1], \ + task_deps[196][:1], \ + task_deps[197][:1], \ + task_deps[198][:1], \ + task_deps[199][:1], \ + task_deps[200][:1], \ + task_deps[201][:1], \ + task_deps[202][:1], \ + task_deps[203][:1], \ + task_deps[204][:1], \ + task_deps[205][:1], \ + task_deps[206][:1], \ + task_deps[207][:1], \ + task_deps[208][:1], \ + task_deps[209][:1], \ + task_deps[210][:1], \ + task_deps[211][:1], \ + task_deps[212][:1], \ + task_deps[213][:1], \ + task_deps[214][:1], \ + task_deps[215][:1], \ + task_deps[216][:1], \ + task_deps[217][:1], \ + task_deps[218][:1], \ + task_deps[219][:1], \ + task_deps[220][:1], \ + task_deps[221][:1], \ + task_deps[222][:1], \ + task_deps[223][:1], \ + task_deps[224][:1], \ + task_deps[225][:1], \ + task_deps[226][:1], \ + task_deps[227][:1], \ + task_deps[228][:1], \ + task_deps[229][:1], \ + task_deps[230][:1], \ + task_deps[231][:1], \ + task_deps[232][:1], \ + task_deps[233][:1], \ + task_deps[234][:1], \ + task_deps[235][:1], \ + task_deps[236][:1], \ + task_deps[237][:1], \ + task_deps[238][:1], \ + task_deps[239][:1], \ + task_deps[240][:1], \ + task_deps[241][:1], \ + task_deps[242][:1], \ + task_deps[243][:1], \ + task_deps[244][:1], \ + task_deps[245][:1], \ + task_deps[246][:1], \ + task_deps[247][:1], \ + task_deps[248][:1], \ + task_deps[249][:1], \ + task_deps[250][:1], \ + task_deps[251][:1], \ + task_deps[252][:1], \ + task_deps[253][:1], \ + task_deps[254][:1], \ + task_deps[255][:1], \ + task_deps[256][:1], \ + task_deps[257][:1], \ + task_deps[258][:1], \ + task_deps[259][:1], \ + task_deps[260][:1], \ + task_deps[261][:1], \ + task_deps[262][:1], \ + task_deps[263][:1], \ + task_deps[264][:1], \ + task_deps[265][:1], \ + task_deps[266][:1], \ + task_deps[267][:1], \ + task_deps[268][:1], \ + task_deps[269][:1], \ + task_deps[270][:1], \ + task_deps[271][:1], \ + task_deps[272][:1], \ + task_deps[273][:1], \ + task_deps[274][:1], \ + task_deps[275][:1], \ + task_deps[276][:1], \ + task_deps[277][:1], \ + task_deps[278][:1], \ + task_deps[279][:1], \ + task_deps[280][:1], \ + task_deps[281][:1], \ + task_deps[282][:1], \ + task_deps[283][:1], \ + task_deps[284][:1], \ + task_deps[285][:1], \ + task_deps[286][:1], \ + task_deps[287][:1], \ + task_deps[288][:1], \ + task_deps[289][:1], \ + task_deps[290][:1], \ + task_deps[291][:1], \ + task_deps[292][:1], \ + task_deps[293][:1], \ + task_deps[294][:1], \ + task_deps[295][:1], \ + task_deps[296][:1], \ + task_deps[297][:1], \ + task_deps[298][:1], \ + task_deps[299][:1], \ + task_deps[300][:1], \ + task_deps[301][:1], \ + task_deps[302][:1], \ + task_deps[303][:1], \ + task_deps[304][:1], \ + task_deps[305][:1], \ + task_deps[306][:1], \ + task_deps[307][:1], \ + task_deps[308][:1], \ + task_deps[309][:1], \ + task_deps[310][:1], \ + task_deps[311][:1], \ + task_deps[312][:1], \ + task_deps[313][:1], \ + task_deps[314][:1], \ + task_deps[315][:1], \ + task_deps[316][:1], \ + task_deps[317][:1], \ + task_deps[318][:1], \ + task_deps[319][:1], \ + task_deps[320][:1], \ + task_deps[321][:1], \ + task_deps[322][:1], \ + task_deps[323][:1], \ + task_deps[324][:1], \ + task_deps[325][:1], \ + task_deps[326][:1], \ + task_deps[327][:1], \ + task_deps[328][:1], \ + task_deps[329][:1], \ + task_deps[330][:1], \ + task_deps[331][:1], \ + task_deps[332][:1], \ + task_deps[333][:1], \ + task_deps[334][:1], \ + task_deps[335][:1], \ + task_deps[336][:1], \ + task_deps[337][:1], \ + task_deps[338][:1], \ + task_deps[339][:1], \ + task_deps[340][:1], \ + task_deps[341][:1], \ + task_deps[342][:1] \ + ) depend(inout: data_dep[:1]) firstprivate(node) + { - // Generate task dependencies + const int thread_num = omp_get_thread_num(); - // The array of dependencies, we know that there cannot be more than - // 7^Dim cells involved in a M2L, in 3D, this is 343 - const char* task_deps[343]; - const char* data_dep = get_dependency<dep_t::L>(node);(void) data_dep; - std::size_t idx_dep = 0; - // Add existing dependencies + std::vector<decltype(std::declval<const node_t>().getData())> v_item_data_list; + std::vector<int> v_item_indices; + // Needed to compute offset between boxes for(node_t* v_item : node->V) { if(v_item->is_leaf() && v_item->getParticleContainer()->size() == 0) { continue; } - task_deps[idx_dep] = get_dependency<dep_t::M>(v_item); - ++idx_dep; - } - // Add mock dependencies, these are generated on the fly and used - // only once, that way they can never stop a task from starting - while(idx_dep < 343) { - task_deps[idx_dep] = this->mock_dep.next(); - ++idx_dep; - } - - #pragma omp task \ - depend(in: \ - task_deps[0][:1], \ - task_deps[1][:1], \ - task_deps[2][:1], \ - task_deps[3][:1], \ - task_deps[4][:1], \ - task_deps[5][:1], \ - task_deps[6][:1], \ - task_deps[7][:1], \ - task_deps[8][:1], \ - task_deps[9][:1], \ - task_deps[10][:1], \ - task_deps[11][:1], \ - task_deps[12][:1], \ - task_deps[13][:1], \ - task_deps[14][:1], \ - task_deps[15][:1], \ - task_deps[16][:1], \ - task_deps[17][:1], \ - task_deps[18][:1], \ - task_deps[19][:1], \ - task_deps[20][:1], \ - task_deps[21][:1], \ - task_deps[22][:1], \ - task_deps[23][:1], \ - task_deps[24][:1], \ - task_deps[25][:1], \ - task_deps[26][:1], \ - task_deps[27][:1], \ - task_deps[28][:1], \ - task_deps[29][:1], \ - task_deps[30][:1], \ - task_deps[31][:1], \ - task_deps[32][:1], \ - task_deps[33][:1], \ - task_deps[34][:1], \ - task_deps[35][:1], \ - task_deps[36][:1], \ - task_deps[37][:1], \ - task_deps[38][:1], \ - task_deps[39][:1], \ - task_deps[40][:1], \ - task_deps[41][:1], \ - task_deps[42][:1], \ - task_deps[43][:1], \ - task_deps[44][:1], \ - task_deps[45][:1], \ - task_deps[46][:1], \ - task_deps[47][:1], \ - task_deps[48][:1], \ - task_deps[49][:1], \ - task_deps[50][:1], \ - task_deps[51][:1], \ - task_deps[52][:1], \ - task_deps[53][:1], \ - task_deps[54][:1], \ - task_deps[55][:1], \ - task_deps[56][:1], \ - task_deps[57][:1], \ - task_deps[58][:1], \ - task_deps[59][:1], \ - task_deps[60][:1], \ - task_deps[61][:1], \ - task_deps[62][:1], \ - task_deps[63][:1], \ - task_deps[64][:1], \ - task_deps[65][:1], \ - task_deps[66][:1], \ - task_deps[67][:1], \ - task_deps[68][:1], \ - task_deps[69][:1], \ - task_deps[70][:1], \ - task_deps[71][:1], \ - task_deps[72][:1], \ - task_deps[73][:1], \ - task_deps[74][:1], \ - task_deps[75][:1], \ - task_deps[76][:1], \ - task_deps[77][:1], \ - task_deps[78][:1], \ - task_deps[79][:1], \ - task_deps[80][:1], \ - task_deps[81][:1], \ - task_deps[82][:1], \ - task_deps[83][:1], \ - task_deps[84][:1], \ - task_deps[85][:1], \ - task_deps[86][:1], \ - task_deps[87][:1], \ - task_deps[88][:1], \ - task_deps[89][:1], \ - task_deps[90][:1], \ - task_deps[91][:1], \ - task_deps[92][:1], \ - task_deps[93][:1], \ - task_deps[94][:1], \ - task_deps[95][:1], \ - task_deps[96][:1], \ - task_deps[97][:1], \ - task_deps[98][:1], \ - task_deps[99][:1], \ - task_deps[100][:1], \ - task_deps[101][:1], \ - task_deps[102][:1], \ - task_deps[103][:1], \ - task_deps[104][:1], \ - task_deps[105][:1], \ - task_deps[106][:1], \ - task_deps[107][:1], \ - task_deps[108][:1], \ - task_deps[109][:1], \ - task_deps[110][:1], \ - task_deps[111][:1], \ - task_deps[112][:1], \ - task_deps[113][:1], \ - task_deps[114][:1], \ - task_deps[115][:1], \ - task_deps[116][:1], \ - task_deps[117][:1], \ - task_deps[118][:1], \ - task_deps[119][:1], \ - task_deps[120][:1], \ - task_deps[121][:1], \ - task_deps[122][:1], \ - task_deps[123][:1], \ - task_deps[124][:1], \ - task_deps[125][:1], \ - task_deps[126][:1], \ - task_deps[127][:1], \ - task_deps[128][:1], \ - task_deps[129][:1], \ - task_deps[130][:1], \ - task_deps[131][:1], \ - task_deps[132][:1], \ - task_deps[133][:1], \ - task_deps[134][:1], \ - task_deps[135][:1], \ - task_deps[136][:1], \ - task_deps[137][:1], \ - task_deps[138][:1], \ - task_deps[139][:1], \ - task_deps[140][:1], \ - task_deps[141][:1], \ - task_deps[142][:1], \ - task_deps[143][:1], \ - task_deps[144][:1], \ - task_deps[145][:1], \ - task_deps[146][:1], \ - task_deps[147][:1], \ - task_deps[148][:1], \ - task_deps[149][:1], \ - task_deps[150][:1], \ - task_deps[151][:1], \ - task_deps[152][:1], \ - task_deps[153][:1], \ - task_deps[154][:1], \ - task_deps[155][:1], \ - task_deps[156][:1], \ - task_deps[157][:1], \ - task_deps[158][:1], \ - task_deps[159][:1], \ - task_deps[160][:1], \ - task_deps[161][:1], \ - task_deps[162][:1], \ - task_deps[163][:1], \ - task_deps[164][:1], \ - task_deps[165][:1], \ - task_deps[166][:1], \ - task_deps[167][:1], \ - task_deps[168][:1], \ - task_deps[169][:1], \ - task_deps[170][:1], \ - task_deps[171][:1], \ - task_deps[172][:1], \ - task_deps[173][:1], \ - task_deps[174][:1], \ - task_deps[175][:1], \ - task_deps[176][:1], \ - task_deps[177][:1], \ - task_deps[178][:1], \ - task_deps[179][:1], \ - task_deps[180][:1], \ - task_deps[181][:1], \ - task_deps[182][:1], \ - task_deps[183][:1], \ - task_deps[184][:1], \ - task_deps[185][:1], \ - task_deps[186][:1], \ - task_deps[187][:1], \ - task_deps[188][:1], \ - task_deps[189][:1], \ - task_deps[190][:1], \ - task_deps[191][:1], \ - task_deps[192][:1], \ - task_deps[193][:1], \ - task_deps[194][:1], \ - task_deps[195][:1], \ - task_deps[196][:1], \ - task_deps[197][:1], \ - task_deps[198][:1], \ - task_deps[199][:1], \ - task_deps[200][:1], \ - task_deps[201][:1], \ - task_deps[202][:1], \ - task_deps[203][:1], \ - task_deps[204][:1], \ - task_deps[205][:1], \ - task_deps[206][:1], \ - task_deps[207][:1], \ - task_deps[208][:1], \ - task_deps[209][:1], \ - task_deps[210][:1], \ - task_deps[211][:1], \ - task_deps[212][:1], \ - task_deps[213][:1], \ - task_deps[214][:1], \ - task_deps[215][:1], \ - task_deps[216][:1], \ - task_deps[217][:1], \ - task_deps[218][:1], \ - task_deps[219][:1], \ - task_deps[220][:1], \ - task_deps[221][:1], \ - task_deps[222][:1], \ - task_deps[223][:1], \ - task_deps[224][:1], \ - task_deps[225][:1], \ - task_deps[226][:1], \ - task_deps[227][:1], \ - task_deps[228][:1], \ - task_deps[229][:1], \ - task_deps[230][:1], \ - task_deps[231][:1], \ - task_deps[232][:1], \ - task_deps[233][:1], \ - task_deps[234][:1], \ - task_deps[235][:1], \ - task_deps[236][:1], \ - task_deps[237][:1], \ - task_deps[238][:1], \ - task_deps[239][:1], \ - task_deps[240][:1], \ - task_deps[241][:1], \ - task_deps[242][:1], \ - task_deps[243][:1], \ - task_deps[244][:1], \ - task_deps[245][:1], \ - task_deps[246][:1], \ - task_deps[247][:1], \ - task_deps[248][:1], \ - task_deps[249][:1], \ - task_deps[250][:1], \ - task_deps[251][:1], \ - task_deps[252][:1], \ - task_deps[253][:1], \ - task_deps[254][:1], \ - task_deps[255][:1], \ - task_deps[256][:1], \ - task_deps[257][:1], \ - task_deps[258][:1], \ - task_deps[259][:1], \ - task_deps[260][:1], \ - task_deps[261][:1], \ - task_deps[262][:1], \ - task_deps[263][:1], \ - task_deps[264][:1], \ - task_deps[265][:1], \ - task_deps[266][:1], \ - task_deps[267][:1], \ - task_deps[268][:1], \ - task_deps[269][:1], \ - task_deps[270][:1], \ - task_deps[271][:1], \ - task_deps[272][:1], \ - task_deps[273][:1], \ - task_deps[274][:1], \ - task_deps[275][:1], \ - task_deps[276][:1], \ - task_deps[277][:1], \ - task_deps[278][:1], \ - task_deps[279][:1], \ - task_deps[280][:1], \ - task_deps[281][:1], \ - task_deps[282][:1], \ - task_deps[283][:1], \ - task_deps[284][:1], \ - task_deps[285][:1], \ - task_deps[286][:1], \ - task_deps[287][:1], \ - task_deps[288][:1], \ - task_deps[289][:1], \ - task_deps[290][:1], \ - task_deps[291][:1], \ - task_deps[292][:1], \ - task_deps[293][:1], \ - task_deps[294][:1], \ - task_deps[295][:1], \ - task_deps[296][:1], \ - task_deps[297][:1], \ - task_deps[298][:1], \ - task_deps[299][:1], \ - task_deps[300][:1], \ - task_deps[301][:1], \ - task_deps[302][:1], \ - task_deps[303][:1], \ - task_deps[304][:1], \ - task_deps[305][:1], \ - task_deps[306][:1], \ - task_deps[307][:1], \ - task_deps[308][:1], \ - task_deps[309][:1], \ - task_deps[310][:1], \ - task_deps[311][:1], \ - task_deps[312][:1], \ - task_deps[313][:1], \ - task_deps[314][:1], \ - task_deps[315][:1], \ - task_deps[316][:1], \ - task_deps[317][:1], \ - task_deps[318][:1], \ - task_deps[319][:1], \ - task_deps[320][:1], \ - task_deps[321][:1], \ - task_deps[322][:1], \ - task_deps[323][:1], \ - task_deps[324][:1], \ - task_deps[325][:1], \ - task_deps[326][:1], \ - task_deps[327][:1], \ - task_deps[328][:1], \ - task_deps[329][:1], \ - task_deps[330][:1], \ - task_deps[331][:1], \ - task_deps[332][:1], \ - task_deps[333][:1], \ - task_deps[334][:1], \ - task_deps[335][:1], \ - task_deps[336][:1], \ - task_deps[337][:1], \ - task_deps[338][:1], \ - task_deps[339][:1], \ - task_deps[340][:1], \ - task_deps[341][:1], \ - task_deps[342][:1] \ - ) depend(inout: data_dep[:1]) firstprivate(node) - { - - const int thread_num = omp_get_thread_num(); - - std::vector<decltype(std::declval<const node_t>().getData())> v_item_data_list; - std::vector<int> v_item_indices; - // Needed to compute offset between boxes - for(node_t* v_item : node->V) { - if(v_item->is_leaf() - && v_item->getParticleContainer()->size() == 0) { - continue; - } - v_item_indices.push_back(compute_box_offset_index(node, v_item, 3)); - v_item_data_list.push_back(v_item->getData()); - } - - // Call kernel M2L operator - this->_kernels[thread_num]->M2L(node->getData(), v_item_data_list.data(), v_item_indices.data(), static_cast<int>(v_item_data_list.size()), static_cast<int>(node->getDepth())); + v_item_indices.push_back(compute_box_offset_index(node, v_item, 3)); + v_item_data_list.push_back(v_item->getData()); } + // Call kernel M2L operator + this->_kernels[thread_num]->M2L(node->getData(), v_item_data_list.data(), v_item_indices.data(), static_cast<int>(v_item_data_list.size()), static_cast<int>(node->getDepth())); } - // #pragma omp taskwait } - // P2L + /** \brief Walk through leaves and queue P2L tasks */ void x_list_step() { /* NOTE: the X list and W list are complementary: if A is in X(B) then B * is in W(A). @@ -781,101 +807,154 @@ public: */ for(node_t* leaf : _tree.leaves()) { if(leaf->getParticleContainer()->size() > 0) { - for(node_t* w_item : leaf->W) { - if(w_item->is_leaf() && w_item->getParticleContainer()->size() == 0) { - continue; - } + this->create_P2L_task(leaf); + } + } + // #pragma omp taskwait + } - const char* w_dep = get_dependency<dep_t::L>(w_item);(void)w_dep; - const char* ps_dep = get_dependency<dep_t::P_s>(leaf);(void)ps_dep; - #pragma omp task firstprivate(leaf, w_item) depend(in: ps_dep[:1]) depend(inout: w_dep[:1]) - { - const int thread_num = omp_get_thread_num(); - this->_kernels[thread_num]->P2L(w_item->getData(), leaf->getParticleContainer()); - } - } + /** + * \brief Create and queue a P2L task + * + * \param leaf A tree leaf + */ + void create_P2L_task(node_t* leaf) { + for(node_t* w_item : leaf->W) { + if(w_item->is_leaf() && w_item->getParticleContainer()->size() == 0) { + continue; + } + + const char* w_dep = get_dependency<dep_t::L>(w_item);(void)w_dep; + const char* ps_dep = get_dependency<dep_t::P_s>(leaf);(void)ps_dep; + + #pragma omp task firstprivate(leaf, w_item) depend(in: ps_dep[:1]) depend(inout: w_dep[:1]) + { + const int thread_num = omp_get_thread_num(); + this->_kernels[thread_num]->P2L(w_item->getData(), leaf->getParticleContainer()); } } - // #pragma omp taskwait } - // L2L + /** Walk through tree and queue L2L tasks */ void down_to_down() { for(node_t& n : _tree.pre_order_walk()) { - node_t* node = &n; - if(! node->is_leaf()) { - const char* parent_dep = get_dependency<dep_t::L>(node);(void)parent_dep; - const char* children_dep[node_t::child_count]; - for(std::size_t i = 0; i < node_t::child_count; ++i) { - children_dep[i] = get_dependency<dep_t::L>(node->getChild(i)); - } - - #pragma omp task \ - depend(in: parent_dep[:1]) \ - depend(inout: \ - children_dep[0][:1], \ - children_dep[1][:1], \ - children_dep[2][:1], \ - children_dep[3][:1], \ - children_dep[4][:1], \ - children_dep[5][:1], \ - children_dep[6][:1], \ - children_dep[7][:1] \ - ) - { - const int thread_num = omp_get_thread_num(); - - typename node_t::data_t* child_data[node_t::child_count]; - for(std::size_t i = 0; i < node_t::child_count; ++i) { - child_data[i] = node->getChild(i)->getData(); - } - this->_kernels[thread_num]->L2L(node->getData(), child_data, static_cast<int>(node->getDepth())); - } + if(! n.is_leaf()) { + this->create_L2L_task(&n); } } // #pragma omp taskwait } - // M2P + + /** + * \brief Create and queue an L2L task + * + * \param node An internal tree node + * + * \node node is assumed not to be a leaf + */ + void create_L2L_task(node_t* node) { + assert(! node->is_leaf()); + + const char* parent_dep = get_dependency<dep_t::L>(node);(void)parent_dep; + const char* children_dep[node_t::child_count]; + for(std::size_t i = 0; i < node_t::child_count; ++i) { + children_dep[i] = get_dependency<dep_t::L>(node->getChild(i)); + } + + #pragma omp task \ + depend(in: parent_dep[:1]) \ + depend(inout: \ + children_dep[0][:1], \ + children_dep[1][:1], \ + children_dep[2][:1], \ + children_dep[3][:1], \ + children_dep[4][:1], \ + children_dep[5][:1], \ + children_dep[6][:1], \ + children_dep[7][:1] \ + ) + { + const int thread_num = omp_get_thread_num(); + + typename node_t::data_t* child_data[node_t::child_count]; + for(std::size_t i = 0; i < node_t::child_count; ++i) { + child_data[i] = node->getChild(i)->getData(); + } + this->_kernels[thread_num]->L2L(node->getData(), child_data, static_cast<int>(node->getDepth())); + } + } + + + + /** \brief Walk through the leaves an queue M2P tasks */ void w_list_step() { for(node_t* leaf : _tree.leaves()) { if(leaf->getParticleContainer()->size() > 0) { - for(node_t* w_item : leaf->W) { - if(w_item->is_leaf() && w_item->getParticleContainer()->size() == 0) { - continue; - } + create_M2P_task(leaf); + } + } + // #pragma omp taskwait + } - const char* w_dep = get_dependency<dep_t::M>(w_item); (void)w_dep; - const char* pt_dep = get_dependency<dep_t::P_t>(leaf); (void)pt_dep; + /** + * \brief Create and queue an M2P task + * + * \param leaf A leaf + */ + void create_M2P_task(node_t* leaf) { + for(node_t* w_item : leaf->W) { + if(w_item->is_leaf() && w_item->getParticleContainer()->size() == 0) { + continue; + } - #pragma omp task depend(inout: pt_dep[:1]) depend(in: w_dep[:1]) - { - const int thread_num = omp_get_thread_num(); - this->_kernels[thread_num]->M2P(w_item->getData(), leaf->getParticleContainer()); - } - } + const char* w_dep = get_dependency<dep_t::M>(w_item); (void)w_dep; + const char* pt_dep = get_dependency<dep_t::P_t>(leaf); (void)pt_dep; + + #pragma omp task depend(inout: pt_dep[:1]) depend(in: w_dep[:1]) + { + const int thread_num = omp_get_thread_num(); + this->_kernels[thread_num]->M2P(w_item->getData(), leaf->getParticleContainer()); } } - // #pragma omp taskwait } - // L2P + + /** \brief Walk through the leaves and queue L2P tasks */ void down_to_target() { for(node_t* leaf : _tree.leaves()) { if(leaf->getParticleContainer()->size() != 0) { - const char* data_dep = get_dependency<dep_t::L>(leaf); (void)data_dep; - const char* pt_dep = get_dependency<dep_t::P_t>(leaf); (void)pt_dep; - #pragma omp task depend(inout: pt_dep[:1]) depend(in: data_dep[:1]) - { - const int thread_num = omp_get_thread_num(); - this->_kernels[thread_num]->L2P(leaf->getData(), leaf->getParticleContainer()); - } + this->create_L2P_task(leaf); } } // #pragma omp taskwait } + /** + * \brief Create and queue an L2P task + * + * \param leaf A leaf + */ + void create_L2P_task(node_t* leaf) { + const char* data_dep = get_dependency<dep_t::L>(leaf); (void)data_dep; + const char* pt_dep = get_dependency<dep_t::P_t>(leaf); (void)pt_dep; + #pragma omp task depend(inout: pt_dep[:1]) depend(in: data_dep[:1]) + { + const int thread_num = omp_get_thread_num(); + this->_kernels[thread_num]->L2P(leaf->getData(), leaf->getParticleContainer()); + } + } + + + /** \brief Walk through the leaves and queue P2P tasks */ + void u_list_step() { + for(node_t* leaf : _tree.leaves()) { + this->create_P2P_task(leaf); + } + // #pragma omp taskwait + } + /** \brief Direct computation step (P2P) * @@ -903,121 +982,114 @@ public: * given to the kernel P2P method. * */ - void u_list_step() { + void create_P2P_task(node_t* leaf) { using container_t = typename node_t::particle_container_t; - // The following containers are reused for each leaf to avoid repeated - // dynamic allocation. + container_t* const leaf_source_particle_container = + leaf->getParticleContainer(); + container_t* const leaf_target_particle_container = + leaf->getParticleContainer(); - for(node_t* leaf : _tree.leaves()) { + // Skip empty leaves + if( leaf_source_particle_container->size() == 0 + && leaf_target_particle_container->size() == 0) { + return; + } - container_t* const leaf_source_particle_container = - leaf->getParticleContainer(); - container_t* const leaf_target_particle_container = - leaf->getParticleContainer(); + auto it = leaf->U.begin(); + bool do_inner = true; - // Skip empty leaves - if( leaf_source_particle_container->size() == 0 - && leaf_target_particle_container->size() == 0) { - continue; - } + while(it != leaf->U.end()) { + constexpr const std::size_t max_task_size = 27; + std::size_t i = 0; + auto first = it; - auto it = leaf->U.begin(); - bool do_inner = true; + const char* task_deps[max_task_size]; + const char* leaf_s_dep = get_dependency<dep_t::P_s>(leaf);(void)leaf_s_dep; + const char* leaf_t_dep = get_dependency<dep_t::P_t>(leaf);(void)leaf_t_dep; - while(it != leaf->U.end()) { - constexpr const std::size_t max_task_size = 27; - std::size_t i = 0; - auto first = it; + while((it != leaf->U.end()) && (i < max_task_size)) { + if((*it) == leaf || (*it)->getParticleContainer()->size() == 0) { + ++it; + continue; + } - const char* task_deps[max_task_size]; - const char* leaf_s_dep = get_dependency<dep_t::P_s>(leaf);(void)leaf_s_dep; - const char* leaf_t_dep = get_dependency<dep_t::P_t>(leaf);(void)leaf_t_dep; + task_deps[i] = get_dependency<dep_t::P_t>(*it); + ++i; + ++it; + } - while((it != leaf->U.end()) && (i < max_task_size)) { - if((*it) == leaf || (*it)->getParticleContainer()->size() == 0) { - ++it; - continue; - } + while(i < max_task_size) { + task_deps[i] = this->mock_dep.next(); + ++i; + } - task_deps[i] = get_dependency<dep_t::P_t>(*it); - ++i; - ++it; - } + #pragma omp task \ + firstprivate(leaf, leaf_source_particle_container, leaf_target_particle_container, first, it, do_inner) \ + depend(inout: \ + leaf_s_dep[:1], \ + leaf_t_dep[:1], \ + task_deps[0][:1], \ + task_deps[1][:1], \ + task_deps[2][:1], \ + task_deps[3][:1], \ + task_deps[4][:1], \ + task_deps[5][:1], \ + task_deps[6][:1], \ + task_deps[7][:1], \ + task_deps[8][:1], \ + task_deps[9][:1], \ + task_deps[10][:1], \ + task_deps[11][:1], \ + task_deps[12][:1], \ + task_deps[13][:1], \ + task_deps[14][:1], \ + task_deps[15][:1], \ + task_deps[16][:1], \ + task_deps[17][:1], \ + task_deps[18][:1], \ + task_deps[19][:1], \ + task_deps[20][:1], \ + task_deps[21][:1], \ + task_deps[22][:1], \ + task_deps[23][:1], \ + task_deps[24][:1], \ + task_deps[25][:1], \ + task_deps[26][:1] \ + ) + { + const int thread_num = omp_get_thread_num(); + // Vectors to be filled after sort and passed to kernel P2P + std::vector<container_t*> u_item_source_particle_containers; + std::vector<int> u_item_indices; - while(i < max_task_size) { - task_deps[i] = this->mock_dep.next(); - ++i; - } + auto last = it; - #pragma omp task \ - firstprivate(leaf, leaf_source_particle_container, leaf_target_particle_container, first, it, do_inner) \ - depend(inout: \ - leaf_s_dep[:1], \ - leaf_t_dep[:1], \ - task_deps[0][:1], \ - task_deps[1][:1], \ - task_deps[2][:1], \ - task_deps[3][:1], \ - task_deps[4][:1], \ - task_deps[5][:1], \ - task_deps[6][:1], \ - task_deps[7][:1], \ - task_deps[8][:1], \ - task_deps[9][:1], \ - task_deps[10][:1], \ - task_deps[11][:1], \ - task_deps[12][:1], \ - task_deps[13][:1], \ - task_deps[14][:1], \ - task_deps[15][:1], \ - task_deps[16][:1], \ - task_deps[17][:1], \ - task_deps[18][:1], \ - task_deps[19][:1], \ - task_deps[20][:1], \ - task_deps[21][:1], \ - task_deps[22][:1], \ - task_deps[23][:1], \ - task_deps[24][:1], \ - task_deps[25][:1], \ - task_deps[26][:1] \ - ) - { - const int thread_num = omp_get_thread_num(); - // Vectors to be filled after sort and passed to kernel P2P - std::vector<container_t*> u_item_source_particle_containers; - std::vector<int> u_item_indices; - - auto last = it; - - while(first != last) { - // Skip empty u_items - if((*first) == leaf || (*first)->getParticleContainer()->size() == 0) { - ++first; - continue; - } - u_item_source_particle_containers.push_back((*first)->getParticleContainer()); - u_item_indices.push_back(compute_box_offset_index(leaf, *first, 1)); + while(first != last) { + // Skip empty u_items + if((*first) == leaf || (*first)->getParticleContainer()->size() == 0) { ++first; + continue; } - // Call P2P on vectors data - this->_kernels[thread_num] - ->P2P( - FTreeCoordinate(MortonIndex(leaf->getIndex())), - leaf_target_particle_container, - leaf_source_particle_container, - u_item_source_particle_containers.data(), - u_item_indices.data(), - static_cast<int>(u_item_source_particle_containers.size()), - do_inner - ); + u_item_source_particle_containers.push_back((*first)->getParticleContainer()); + u_item_indices.push_back(compute_box_offset_index(leaf, *first, 1)); + ++first; } - do_inner = false; - + // Call P2P on vectors data + this->_kernels[thread_num] + ->P2P( + FTreeCoordinate(MortonIndex(leaf->getIndex())), + leaf_target_particle_container, + leaf_source_particle_container, + u_item_source_particle_containers.data(), + u_item_indices.data(), + static_cast<int>(u_item_source_particle_containers.size()), + do_inner + ); } + do_inner = false; + } - // #pragma omp taskwait } -- GitLab