Commit 24ced909 authored by BRAMAS Berenger's avatar BRAMAS Berenger

Build intervals in task

parent e47c9ae9
......@@ -203,191 +203,230 @@ protected:
}
// Allocate the working buffer
std::unique_ptr<WorkloadTemp[]> workloadBuffer(new WorkloadTemp[leafsNumber]);
std::unique_ptr<WorkloadTemp*[]> workloadBufferThread(new WorkloadTemp*[MaxThreads]);
memset(workloadBufferThread.get(), 0, MaxThreads*sizeof(WorkloadTemp*));
{ // Prepare P2M
/// FLOG(FLog::Controller << "[Balance] P2M:\n");
typename OctreeClass::Iterator octreeIterator(tree);
octreeIterator.gotoBottomLeft();
FSize idxLeaf = 0;
FSize totalWork = 0;
do{
// Keep track of tree iterator
workloadBuffer[idxLeaf].iterator = octreeIterator;
// Count the nb of particles as amount of work in the leaf
workloadBuffer[idxLeaf].amountOfWork = octreeIterator.getCurrentListSrc()->getNbParticles();
// Keep the total amount of work
totalWork += workloadBuffer[idxLeaf].amountOfWork;
++idxLeaf;
} while(octreeIterator.moveRight());
generateIntervalFromWorkload(&workloadP2M, totalWork, workloadBuffer.get(), idxLeaf);
}
#pragma omp parallel
{
#pragma omp single
{
#pragma omp task
{ // Prepare P2M
if(workloadBufferThread[omp_get_thread_num()] == nullptr){
workloadBufferThread[omp_get_thread_num()] = new WorkloadTemp[leafsNumber];
}
WorkloadTemp* workloadBuffer = workloadBufferThread[omp_get_thread_num()];
/// FLOG(FLog::Controller << "[Balance] P2M:\n");
typename OctreeClass::Iterator octreeIterator(tree);
octreeIterator.gotoBottomLeft();
FSize idxLeaf = 0;
FSize totalWork = 0;
do{
// Keep track of tree iterator
workloadBuffer[idxLeaf].iterator = octreeIterator;
// Count the nb of particles as amount of work in the leaf
workloadBuffer[idxLeaf].amountOfWork = octreeIterator.getCurrentListSrc()->getNbParticles();
// Keep the total amount of work
totalWork += workloadBuffer[idxLeaf].amountOfWork;
++idxLeaf;
} while(octreeIterator.moveRight());
generateIntervalFromWorkload(&workloadP2M, totalWork, workloadBuffer, idxLeaf);
}
{ // Prepare L2P
/// FLOG(FLog::Controller << "[Balance] L2P:\n");
typename OctreeClass::Iterator octreeIterator(tree);
octreeIterator.gotoBottomLeft();
FSize idxLeaf = 0;
FSize totalWork = 0;
do{
// Keep track of tree iterator
workloadBuffer[idxLeaf].iterator = octreeIterator;
// Count the nb of particles as amount of work in the leaf
workloadBuffer[idxLeaf].amountOfWork = octreeIterator.getCurrentListTargets()->getNbParticles();
// Keep the total amount of work
totalWork += workloadBuffer[idxLeaf].amountOfWork;
++idxLeaf;
} while(octreeIterator.moveRight());
#pragma omp task
{ // Prepare L2P
if(workloadBufferThread[omp_get_thread_num()] == nullptr){
workloadBufferThread[omp_get_thread_num()] = new WorkloadTemp[leafsNumber];
}
WorkloadTemp* workloadBuffer = workloadBufferThread[omp_get_thread_num()];
/// FLOG(FLog::Controller << "[Balance] L2P:\n");
typename OctreeClass::Iterator octreeIterator(tree);
octreeIterator.gotoBottomLeft();
FSize idxLeaf = 0;
FSize totalWork = 0;
do{
// Keep track of tree iterator
workloadBuffer[idxLeaf].iterator = octreeIterator;
// Count the nb of particles as amount of work in the leaf
workloadBuffer[idxLeaf].amountOfWork = octreeIterator.getCurrentListTargets()->getNbParticles();
// Keep the total amount of work
totalWork += workloadBuffer[idxLeaf].amountOfWork;
++idxLeaf;
} while(octreeIterator.moveRight());
generateIntervalFromWorkload(&workloadL2P, totalWork, workloadBuffer, idxLeaf);
}
generateIntervalFromWorkload(&workloadL2P, totalWork, workloadBuffer.get(), idxLeaf);
}
#pragma omp task
{// Do it for the M2L
if(workloadBufferThread[omp_get_thread_num()] == nullptr){
workloadBufferThread[omp_get_thread_num()] = new WorkloadTemp[leafsNumber];
}
WorkloadTemp* workloadBuffer = workloadBufferThread[omp_get_thread_num()];
/// FLOG(FLog::Controller << "[Balance] M2L:\n");
workloadM2L.resize(OctreeHeight);
typename OctreeClass::Iterator avoidGotoLeftIterator(tree);
avoidGotoLeftIterator.gotoBottomLeft();
const CellClass* neighbors[343];
for(int idxLevel = OctreeHeight-1 ; idxLevel >= 2 ; --idxLevel){
/// FLOG(FLog::Controller << "[Balance] \t level " << idxLevel << ":\n");
typename OctreeClass::Iterator octreeIterator(avoidGotoLeftIterator);
avoidGotoLeftIterator.moveUp();
FSize idxCell = 0;
FSize totalWork = 0;
do{
// Keep track of tree iterator
workloadBuffer[idxCell].iterator = octreeIterator;
// Count the nb of M2L for this cell
workloadBuffer[idxCell].amountOfWork = tree->getInteractionNeighbors(neighbors, octreeIterator.getCurrentGlobalCoordinate(), idxLevel, 1);
// Keep the total amount of work
totalWork += workloadBuffer[idxCell].amountOfWork;
++idxCell;
} while(octreeIterator.moveRight());
// Now split between thread
generateIntervalFromWorkload(&workloadM2L[idxLevel], totalWork, workloadBuffer, idxCell);
}
}
#pragma omp task
{// Do it for the M2M L2L
if(workloadBufferThread[omp_get_thread_num()] == nullptr){
workloadBufferThread[omp_get_thread_num()] = new WorkloadTemp[leafsNumber];
}
WorkloadTemp* workloadBuffer = workloadBufferThread[omp_get_thread_num()];
/// FLOG(FLog::Controller << "[Balance] M2M L2L:\n");
workloadM2M.resize(OctreeHeight);
workloadL2L.resize(OctreeHeight);
typename OctreeClass::Iterator avoidGotoLeftIterator(tree);
avoidGotoLeftIterator.gotoBottomLeft();
avoidGotoLeftIterator.moveUp();
for(int idxLevel = OctreeHeight-2 ; idxLevel >= 2 ; --idxLevel){
/// FLOG(FLog::Controller << "[Balance] \t level " << idxLevel << ":\n");
typename OctreeClass::Iterator octreeIterator(avoidGotoLeftIterator);
avoidGotoLeftIterator.moveUp();
FSize idxCell = 0;
FSize totalWork = 0;
do{
// Keep track of tree iterator
workloadBuffer[idxCell].iterator = octreeIterator;
// Count the nb of children of the current cell
workloadBuffer[idxCell].amountOfWork = 0;
CellClass** child = octreeIterator.getCurrentChild();
for(int idxChild = 0 ; idxChild < 8 ; ++idxChild){
if(child[idxChild]) workloadBuffer[idxCell].amountOfWork += 1;
}
// Keep the total amount of work
totalWork += workloadBuffer[idxCell].amountOfWork;
++idxCell;
} while(octreeIterator.moveRight());
// Now split between thread
generateIntervalFromWorkload(&workloadM2M[idxLevel], totalWork, workloadBuffer, idxCell);
generateIntervalFromWorkload(&workloadL2L[idxLevel], totalWork, workloadBuffer, idxCell);
}
}
{// Do it for the M2L
/// FLOG(FLog::Controller << "[Balance] M2L:\n");
workloadM2L.resize(OctreeHeight);
typename OctreeClass::Iterator avoidGotoLeftIterator(tree);
avoidGotoLeftIterator.gotoBottomLeft();
const CellClass* neighbors[343];
for(int idxLevel = OctreeHeight-1 ; idxLevel >= 2 ; --idxLevel){
FLOG(FLog::Controller << "[Balance] \t level " << idxLevel << ":\n");
typename OctreeClass::Iterator octreeIterator(avoidGotoLeftIterator);
avoidGotoLeftIterator.moveUp();
FSize idxCell = 0;
FSize totalWork = 0;
do{
// Keep track of tree iterator
workloadBuffer[idxCell].iterator = octreeIterator;
// Count the nb of M2L for this cell
workloadBuffer[idxCell].amountOfWork = tree->getInteractionNeighbors(neighbors, octreeIterator.getCurrentGlobalCoordinate(), idxLevel, 1);
// Keep the total amount of work
totalWork += workloadBuffer[idxCell].amountOfWork;
++idxCell;
} while(octreeIterator.moveRight());
// Now split between thread
generateIntervalFromWorkload(&workloadM2L[idxLevel], totalWork, workloadBuffer.get(), idxCell);
}
}
{// Do it for the M2M L2L
/// FLOG(FLog::Controller << "[Balance] M2M L2L:\n");
workloadM2M.resize(OctreeHeight);
workloadL2L.resize(OctreeHeight);
typename OctreeClass::Iterator avoidGotoLeftIterator(tree);
avoidGotoLeftIterator.gotoBottomLeft();
avoidGotoLeftIterator.moveUp();
for(int idxLevel = OctreeHeight-2 ; idxLevel >= 2 ; --idxLevel){
FLOG(FLog::Controller << "[Balance] \t level " << idxLevel << ":\n");
typename OctreeClass::Iterator octreeIterator(avoidGotoLeftIterator);
avoidGotoLeftIterator.moveUp();
FSize idxCell = 0;
FSize totalWork = 0;
do{
// Keep track of tree iterator
workloadBuffer[idxCell].iterator = octreeIterator;
// Count the nb of children of the current cell
workloadBuffer[idxCell].amountOfWork = 0;
CellClass** child = octreeIterator.getCurrentChild();
for(int idxChild = 0 ; idxChild < 8 ; ++idxChild){
if(child[idxChild]) workloadBuffer[idxCell].amountOfWork += 1;
#pragma omp task
{
if(workloadBufferThread[omp_get_thread_num()] == nullptr){
workloadBufferThread[omp_get_thread_num()] = new WorkloadTemp[leafsNumber];
}
WorkloadTemp* workloadBuffer = workloadBufferThread[omp_get_thread_num()];
// Prepare the P2P
const int LeafIndex = OctreeHeight - 1;
leafsDataArray.reset(new LeafData[leafsNumber]);
// We need the offset for each color
int startPosAtShape[SizeShape] = {0};
for(int idxShape = 1 ; idxShape < SizeShape ; ++idxShape){
startPosAtShape[idxShape] = startPosAtShape[idxShape-1] + shapeLeaves[idxShape-1];
}
// Keep the total amount of work
totalWork += workloadBuffer[idxCell].amountOfWork;
++idxCell;
} while(octreeIterator.moveRight());
// Now split between thread
generateIntervalFromWorkload(&workloadM2M[idxLevel], totalWork, workloadBuffer.get(), idxCell);
generateIntervalFromWorkload(&workloadL2L[idxLevel], totalWork, workloadBuffer.get(), idxCell);
}
}
{
// Prepare the P2P
const int LeafIndex = OctreeHeight - 1;
leafsDataArray.reset(new LeafData[leafsNumber]);
// We need the offset for each color
int startPosAtShape[SizeShape] = {0};
for(int idxShape = 1 ; idxShape < SizeShape ; ++idxShape){
startPosAtShape[idxShape] = startPosAtShape[idxShape-1] + shapeLeaves[idxShape-1];
}
// Prepare each color
typename OctreeClass::Iterator octreeIterator(tree);
octreeIterator.gotoBottomLeft();
// Prepare each color
typename OctreeClass::Iterator octreeIterator(tree);
octreeIterator.gotoBottomLeft();
FSize workPerShape[SizeShape] = {0};
FSize workPerShape[SizeShape] = {0};
// for each leafs
for(int idxLeaf = 0 ; idxLeaf < leafsNumber ; ++idxLeaf){
const FTreeCoordinate& coord = octreeIterator.getCurrentGlobalCoordinate();
const int shapePosition = (coord.getX()%3)*9 + (coord.getY()%3)*3 + (coord.getZ()%3);
const int positionToWork = startPosAtShape[shapePosition]++;
leafsDataArray[positionToWork].index = octreeIterator.getCurrentGlobalIndex();
leafsDataArray[positionToWork].coord = coord;
leafsDataArray[positionToWork].targets = octreeIterator.getCurrentListTargets();
leafsDataArray[positionToWork].sources = octreeIterator.getCurrentListSrc();
// For now the cost is simply based on the number of particles
const FSize nbPartInLeaf = octreeIterator.getCurrentListTargets()->getNbParticles();
workloadBuffer[positionToWork].amountOfWork = nbPartInLeaf*nbPartInLeaf;
ContainerClass* neighbors[27];
tree->getLeafsNeighbors(neighbors, octreeIterator.getCurrentGlobalCoordinate(), LeafIndex);
for(int idxNeigh = 0 ; idxNeigh < 27 ; ++idxNeigh){
if(neighbors[idxNeigh]){
workloadBuffer[positionToWork].amountOfWork +=
nbPartInLeaf * neighbors[idxNeigh]->getNbParticles();
}
}
// for each leafs
for(int idxLeaf = 0 ; idxLeaf < leafsNumber ; ++idxLeaf){
const FTreeCoordinate& coord = octreeIterator.getCurrentGlobalCoordinate();
const int shapePosition = (coord.getX()%3)*9 + (coord.getY()%3)*3 + (coord.getZ()%3);
workPerShape[shapePosition] += workloadBuffer[positionToWork].amountOfWork;
const int positionToWork = startPosAtShape[shapePosition]++;
octreeIterator.moveRight();
}
leafsDataArray[positionToWork].index = octreeIterator.getCurrentGlobalIndex();
leafsDataArray[positionToWork].coord = coord;
leafsDataArray[positionToWork].targets = octreeIterator.getCurrentListTargets();
leafsDataArray[positionToWork].sources = octreeIterator.getCurrentListSrc();
workloadP2P.resize(SizeShape);
int offsetShape = 0;
// For now the cost is simply based on the number of particles
const FSize nbPartInLeaf = octreeIterator.getCurrentListTargets()->getNbParticles();
workloadBuffer[positionToWork].amountOfWork = nbPartInLeaf*nbPartInLeaf;
ContainerClass* neighbors[27];
tree->getLeafsNeighbors(neighbors, octreeIterator.getCurrentGlobalCoordinate(), LeafIndex);
for(int idxNeigh = 0 ; idxNeigh < 27 ; ++idxNeigh){
if(neighbors[idxNeigh]){
workloadBuffer[positionToWork].amountOfWork +=
nbPartInLeaf * neighbors[idxNeigh]->getNbParticles();
}
}
workPerShape[shapePosition] += workloadBuffer[positionToWork].amountOfWork;
octreeIterator.moveRight();
}
workloadP2P.resize(SizeShape);
int offsetShape = 0;
for(int idxShape = 0 ; idxShape < SizeShape ; ++idxShape){
std::vector<std::pair<int,int>>* intervals = &workloadP2P[idxShape];
const int nbElements = shapeLeaves[idxShape];
const FSize totalWork = workPerShape[idxShape];
// Now split between thread
(*intervals).resize(MaxThreads);
// Ideally each thread will have this
const FSize idealWork = (totalWork/MaxThreads);
// Assign default value for first thread
int idxThread = 0;
(*intervals)[idxThread].first = offsetShape;
FSize assignWork = workloadBuffer[0].amountOfWork;
for(int idxElement = 1+offsetShape ; idxElement < nbElements+offsetShape ; ++idxElement){
if(FMath::Abs((idxThread+1)*idealWork - assignWork) <
FMath::Abs((idxThread+1)*idealWork - assignWork - workloadBuffer[idxElement].amountOfWork)
&& idxThread != MaxThreads-1){
(*intervals)[idxThread].second = idxElement;
/// FLOG(FLog::Controller << "[Balance] Shape " << idxShape << " Thread " << idxThread << " goes from "
/// << (*intervals)[idxThread].first << " to " << (*intervals)[idxThread].second << "\n");
idxThread += 1;
(*intervals)[idxThread].first = idxElement;
}
assignWork += workloadBuffer[idxElement].amountOfWork;
}
(*intervals)[idxThread].second = nbElements + offsetShape;
for(int idxShape = 0 ; idxShape < SizeShape ; ++idxShape){
std::vector<std::pair<int,int>>* intervals = &workloadP2P[idxShape];
const int nbElements = shapeLeaves[idxShape];
const FSize totalWork = workPerShape[idxShape];
// Now split between thread
(*intervals).resize(MaxThreads);
// Ideally each thread will have this
const FSize idealWork = (totalWork/MaxThreads);
// Assign default value for first thread
int idxThread = 0;
(*intervals)[idxThread].first = offsetShape;
FSize assignWork = workloadBuffer[0].amountOfWork;
for(int idxElement = 1+offsetShape ; idxElement < nbElements+offsetShape ; ++idxElement){
if(FMath::Abs((idxThread+1)*idealWork - assignWork) <
FMath::Abs((idxThread+1)*idealWork - assignWork - workloadBuffer[idxElement].amountOfWork)
&& idxThread != MaxThreads-1){
(*intervals)[idxThread].second = idxElement;
/// FLOG(FLog::Controller << "[Balance] Shape " << idxShape << " Thread " << idxThread << " goes from "
/// << (*intervals)[idxThread].first << " to " << (*intervals)[idxThread].second << "\n");
idxThread += 1;
(*intervals)[idxThread].first = idxElement;
offsetShape += nbElements;
}
assignWork += workloadBuffer[idxElement].amountOfWork;
}
(*intervals)[idxThread].second = nbElements + offsetShape;
}
/// FLOG(FLog::Controller << "[Balance] Shape " << idxShape << " Thread " << idxThread << " goes from "
/// << (*intervals)[idxThread].first << " to " << (*intervals)[idxThread].second << "\n");
#pragma omp barrier
}
offsetShape += nbElements;
}
for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread){
delete[] workloadBufferThread[idxThread];
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment