Commit ff1a2ea1 authored by Quentin Khan's avatar Quentin Khan

FFmmAlgorithmThread: change to new kernel interface

parent a51adc22
......@@ -16,6 +16,8 @@
#ifndef FFMMALGORITHMTHREAD_HPP
#define FFMMALGORITHMTHREAD_HPP
#include <array>
#include <algorithm>
#include "../Utils/FAssert.hpp"
#include "../Utils/FLog.hpp"
......@@ -68,7 +70,7 @@ class FFmmAlgorithmThread : public FAbstractAlgorithm, public FAlgorithmTimers{
public:
/** Class constructor
*
*
* The constructor needs the octree and the kernels used for computation.
* \param inTree the octree to work on.
* \param inKernels the kernels to call.
......@@ -85,7 +87,7 @@ public:
FAssertLF(tree, "tree cannot be null");
FAssertLF(leafLevelSeparationCriteria < 3, "Separation criteria should be < 3");
FAssertLF(0 < userChunkSize, "Chunk size should be > 0");
this->kernels = new KernelClass*[MaxThreads];
#pragma omp parallel num_threads(MaxThreads)
{
......@@ -94,13 +96,13 @@ public:
this->kernels[omp_get_thread_num()] = new KernelClass(*inKernels);
}
}
FAbstractAlgorithm::setNbLevelsInTree(tree->getHeight());
FLOG(FLog::Controller << "FFmmAlgorithmThread (Max Thread " << omp_get_max_threads() << ")\n");
FLOG(FLog::Controller << "\t static schedule " << (userChunkSize == -1 ? "static" : (userChunkSize == 0 ? "N/p^2" : std::to_string(userChunkSize))) << ")\n");
}
/** Default destructor */
virtual ~FFmmAlgorithmThread(){
for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread){
......@@ -134,13 +136,13 @@ public:
} else {
return userChunkSize;
}
}
}
template <class NumType>
void setChunkSize(const NumType size) {
userChunkSize = size;
}
protected:
/**
* Runs the complete algorithm.
......@@ -216,7 +218,10 @@ protected:
for(int idxLeafs = 0 ; idxLeafs < leafs ; ++idxLeafs){
// We need the current cell that represent the leaf
// and the list of particles
myThreadkernels->P2M( iterArray[idxLeafs].getCurrentCell() , iterArray[idxLeafs].getCurrentListSrc());
myThreadkernels->P2M(
&(iterArray[idxLeafs].getCurrentCell()->getMultipoleData()),
iterArray[idxLeafs].getCurrentCell(),
iterArray[idxLeafs].getCurrentListSrc());
}
}
FLOG(computationCounter.tac() );
......@@ -260,6 +265,7 @@ protected:
octreeIterator = avoidGotoLeftIterator;// equal octreeIterator.moveUp(); octreeIterator.gotoLeft();
const int chunkSize = this->getChunkSize(numberOfCells);
(void)chunkSize; // Used in OpenMP for loop, silence warning
FLOG(computationCounter.tic());
#pragma omp parallel num_threads(MaxThreads)
......@@ -267,9 +273,17 @@ protected:
KernelClass * const myThreadkernels = kernels[omp_get_thread_num()];
#pragma omp for nowait schedule(dynamic, chunkSize)
for(int idxCell = 0 ; idxCell < numberOfCells ; ++idxCell){
// We need the current cell and the child
// child is an array (of 8 child) that may be null
myThreadkernels->M2M( iterArray[idxCell].getCurrentCell() , iterArray[idxCell].getCurrentChild(), idxLevel);
// We need the current cell and its children
using multipole_t = typename CellClass::multipole_t;
std::array<multipole_t*, 8> children_multipoles;
CellClass** children = iterArray[idxCell].getCurrentChildren();
std::transform(children, children+8, children_multipoles.begin(),
[](CellClass* c) {return (c == nullptr ? nullptr : &(c->getMultipoleData()));});
myThreadkernels->M2M(
&(iterArray[idxCell].getCurrentCell()->getMultipoleData()),
children_multipoles.data(),
idxLevel);
}
}
......@@ -327,6 +341,7 @@ protected:
octreeIterator = avoidGotoLeftIterator;
const int chunkSize = this->getChunkSize(numberOfCells);
(void) chunkSize; // Used in OpenMP for loop, silence warning
FLOG(computationCounter.tic());
#pragma omp parallel num_threads(MaxThreads)
......@@ -338,7 +353,22 @@ protected:
#pragma omp for schedule(dynamic, chunkSize) nowait
for(int idxCell = 0 ; idxCell < numberOfCells ; ++idxCell){
const int counter = tree->getInteractionNeighbors(neighbors, neighborPositions, iterArray[idxCell].getCurrentGlobalCoordinate(), idxLevel, separationCriteria);
if(counter) myThreadkernels->M2L( iterArray[idxCell].getCurrentCell() , neighbors, neighborPositions, counter, idxLevel);
if(counter) {
using multipole_t = typename CellClass::multipole_t;
std::array<const multipole_t*, 342> neighbor_multipoles;
std::transform(neighbors, neighbors+counter,
neighbor_multipoles.begin(),
[](const CellClass* c) {
return (c == nullptr ? nullptr
: &(c->getMultipoleData()));
});
myThreadkernels->M2L(
&(iterArray[idxCell].getCurrentCell()->getLocalExpansionData()),
neighbor_multipoles.data(),
neighborPositions,
counter, idxLevel);
}
}
myThreadkernels->finishedLevelM2L(idxLevel);
......@@ -384,14 +414,29 @@ protected:
avoidGotoLeftIterator.moveDown();
octreeIterator = avoidGotoLeftIterator;
FLOG(computationCounter.tic());
const int chunkSize = this->getChunkSize(numberOfCells);
(void) chunkSize; // Used in OpenMP for loop, silence warning
FLOG(computationCounter.tic());
#pragma omp parallel num_threads(MaxThreads)
{
KernelClass * const myThreadkernels = kernels[omp_get_thread_num()];
#pragma omp for nowait schedule(dynamic, chunkSize)
for(int idxCell = 0 ; idxCell < numberOfCells ; ++idxCell){
myThreadkernels->L2L( iterArray[idxCell].getCurrentCell() , iterArray[idxCell].getCurrentChild(), idxLevel);
using local_expansion_t = typename CellClass::local_expansion_t;
std::array<local_expansion_t*, 8> children_expansions;
CellClass** children = iterArray[idxCell].getCurrentChildren();
std::transform(children, children+8, children_expansions.begin(),
[](CellClass* c) {
return (c == nullptr ? nullptr
: &(c->getLocalExpansionData()));
});
myThreadkernels->L2L(
&(iterArray[idxCell].getCurrentCell()->getLocalExpansionData()),
children_expansions.data(),
idxLevel);
}
}
FLOG(computationCounter.tac());
......@@ -489,7 +534,10 @@ protected:
for(int idxLeafs = previous ; idxLeafs < endAtThisShape ; ++idxLeafs){
LeafData& currentIter = leafsDataArray[idxLeafs];
if(l2pEnabled){
myThreadkernels.L2P(currentIter.cell, currentIter.targets);
myThreadkernels.L2P(
&(currentIter.cell->getLocalExpansionData()),
currentIter.cell,
currentIter.targets);
}
if(p2pEnabled){
// need the current particles and neighbors particles
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment