Mentions légales du service

Skip to content
Snippets Groups Projects
Commit 24ced909 authored by BRAMAS Berenger's avatar BRAMAS Berenger
Browse files

Build intervals in task

parent e47c9ae9
No related branches found
No related tags found
No related merge requests found
...@@ -203,191 +203,230 @@ protected: ...@@ -203,191 +203,230 @@ protected:
} }
// Allocate the working buffer // Allocate the working buffer
std::unique_ptr<WorkloadTemp[]> workloadBuffer(new WorkloadTemp[leafsNumber]); std::unique_ptr<WorkloadTemp*[]> workloadBufferThread(new WorkloadTemp*[MaxThreads]);
memset(workloadBufferThread.get(), 0, MaxThreads*sizeof(WorkloadTemp*));
{ // Prepare P2M #pragma omp parallel
/// FLOG(FLog::Controller << "[Balance] P2M:\n"); {
typename OctreeClass::Iterator octreeIterator(tree); #pragma omp single
octreeIterator.gotoBottomLeft(); {
FSize idxLeaf = 0; #pragma omp task
FSize totalWork = 0; { // Prepare P2M
do{ if(workloadBufferThread[omp_get_thread_num()] == nullptr){
// Keep track of tree iterator workloadBufferThread[omp_get_thread_num()] = new WorkloadTemp[leafsNumber];
workloadBuffer[idxLeaf].iterator = octreeIterator; }
// Count the nb of particles as amount of work in the leaf WorkloadTemp* workloadBuffer = workloadBufferThread[omp_get_thread_num()];
workloadBuffer[idxLeaf].amountOfWork = octreeIterator.getCurrentListSrc()->getNbParticles();
// Keep the total amount of work /// FLOG(FLog::Controller << "[Balance] P2M:\n");
totalWork += workloadBuffer[idxLeaf].amountOfWork; typename OctreeClass::Iterator octreeIterator(tree);
++idxLeaf; octreeIterator.gotoBottomLeft();
} while(octreeIterator.moveRight()); FSize idxLeaf = 0;
FSize totalWork = 0;
generateIntervalFromWorkload(&workloadP2M, totalWork, workloadBuffer.get(), idxLeaf); do{
} // Keep track of tree iterator
workloadBuffer[idxLeaf].iterator = octreeIterator;
// Count the nb of particles as amount of work in the leaf
workloadBuffer[idxLeaf].amountOfWork = octreeIterator.getCurrentListSrc()->getNbParticles();
// Keep the total amount of work
totalWork += workloadBuffer[idxLeaf].amountOfWork;
++idxLeaf;
} while(octreeIterator.moveRight());
generateIntervalFromWorkload(&workloadP2M, totalWork, workloadBuffer, idxLeaf);
}
{ // Prepare L2P #pragma omp task
/// FLOG(FLog::Controller << "[Balance] L2P:\n"); { // Prepare L2P
typename OctreeClass::Iterator octreeIterator(tree); if(workloadBufferThread[omp_get_thread_num()] == nullptr){
octreeIterator.gotoBottomLeft(); workloadBufferThread[omp_get_thread_num()] = new WorkloadTemp[leafsNumber];
FSize idxLeaf = 0; }
FSize totalWork = 0; WorkloadTemp* workloadBuffer = workloadBufferThread[omp_get_thread_num()];
do{ /// FLOG(FLog::Controller << "[Balance] L2P:\n");
// Keep track of tree iterator typename OctreeClass::Iterator octreeIterator(tree);
workloadBuffer[idxLeaf].iterator = octreeIterator; octreeIterator.gotoBottomLeft();
// Count the nb of particles as amount of work in the leaf FSize idxLeaf = 0;
workloadBuffer[idxLeaf].amountOfWork = octreeIterator.getCurrentListTargets()->getNbParticles(); FSize totalWork = 0;
// Keep the total amount of work do{
totalWork += workloadBuffer[idxLeaf].amountOfWork; // Keep track of tree iterator
++idxLeaf; workloadBuffer[idxLeaf].iterator = octreeIterator;
} while(octreeIterator.moveRight()); // Count the nb of particles as amount of work in the leaf
workloadBuffer[idxLeaf].amountOfWork = octreeIterator.getCurrentListTargets()->getNbParticles();
// Keep the total amount of work
totalWork += workloadBuffer[idxLeaf].amountOfWork;
++idxLeaf;
} while(octreeIterator.moveRight());
generateIntervalFromWorkload(&workloadL2P, totalWork, workloadBuffer, idxLeaf);
}
generateIntervalFromWorkload(&workloadL2P, totalWork, workloadBuffer.get(), idxLeaf); #pragma omp task
} {// Do it for the M2L
if(workloadBufferThread[omp_get_thread_num()] == nullptr){
workloadBufferThread[omp_get_thread_num()] = new WorkloadTemp[leafsNumber];
}
WorkloadTemp* workloadBuffer = workloadBufferThread[omp_get_thread_num()];
/// FLOG(FLog::Controller << "[Balance] M2L:\n");
workloadM2L.resize(OctreeHeight);
typename OctreeClass::Iterator avoidGotoLeftIterator(tree);
avoidGotoLeftIterator.gotoBottomLeft();
const CellClass* neighbors[343];
for(int idxLevel = OctreeHeight-1 ; idxLevel >= 2 ; --idxLevel){
/// FLOG(FLog::Controller << "[Balance] \t level " << idxLevel << ":\n");
typename OctreeClass::Iterator octreeIterator(avoidGotoLeftIterator);
avoidGotoLeftIterator.moveUp();
FSize idxCell = 0;
FSize totalWork = 0;
do{
// Keep track of tree iterator
workloadBuffer[idxCell].iterator = octreeIterator;
// Count the nb of M2L for this cell
workloadBuffer[idxCell].amountOfWork = tree->getInteractionNeighbors(neighbors, octreeIterator.getCurrentGlobalCoordinate(), idxLevel, 1);
// Keep the total amount of work
totalWork += workloadBuffer[idxCell].amountOfWork;
++idxCell;
} while(octreeIterator.moveRight());
// Now split between thread
generateIntervalFromWorkload(&workloadM2L[idxLevel], totalWork, workloadBuffer, idxCell);
}
}
#pragma omp task
{// Do it for the M2M L2L
if(workloadBufferThread[omp_get_thread_num()] == nullptr){
workloadBufferThread[omp_get_thread_num()] = new WorkloadTemp[leafsNumber];
}
WorkloadTemp* workloadBuffer = workloadBufferThread[omp_get_thread_num()];
/// FLOG(FLog::Controller << "[Balance] M2M L2L:\n");
workloadM2M.resize(OctreeHeight);
workloadL2L.resize(OctreeHeight);
typename OctreeClass::Iterator avoidGotoLeftIterator(tree);
avoidGotoLeftIterator.gotoBottomLeft();
avoidGotoLeftIterator.moveUp();
for(int idxLevel = OctreeHeight-2 ; idxLevel >= 2 ; --idxLevel){
/// FLOG(FLog::Controller << "[Balance] \t level " << idxLevel << ":\n");
typename OctreeClass::Iterator octreeIterator(avoidGotoLeftIterator);
avoidGotoLeftIterator.moveUp();
FSize idxCell = 0;
FSize totalWork = 0;
do{
// Keep track of tree iterator
workloadBuffer[idxCell].iterator = octreeIterator;
// Count the nb of children of the current cell
workloadBuffer[idxCell].amountOfWork = 0;
CellClass** child = octreeIterator.getCurrentChild();
for(int idxChild = 0 ; idxChild < 8 ; ++idxChild){
if(child[idxChild]) workloadBuffer[idxCell].amountOfWork += 1;
}
// Keep the total amount of work
totalWork += workloadBuffer[idxCell].amountOfWork;
++idxCell;
} while(octreeIterator.moveRight());
// Now split between thread
generateIntervalFromWorkload(&workloadM2M[idxLevel], totalWork, workloadBuffer, idxCell);
generateIntervalFromWorkload(&workloadL2L[idxLevel], totalWork, workloadBuffer, idxCell);
}
}
{// Do it for the M2L #pragma omp task
/// FLOG(FLog::Controller << "[Balance] M2L:\n"); {
workloadM2L.resize(OctreeHeight); if(workloadBufferThread[omp_get_thread_num()] == nullptr){
typename OctreeClass::Iterator avoidGotoLeftIterator(tree); workloadBufferThread[omp_get_thread_num()] = new WorkloadTemp[leafsNumber];
avoidGotoLeftIterator.gotoBottomLeft(); }
WorkloadTemp* workloadBuffer = workloadBufferThread[omp_get_thread_num()];
const CellClass* neighbors[343]; // Prepare the P2P
const int LeafIndex = OctreeHeight - 1;
for(int idxLevel = OctreeHeight-1 ; idxLevel >= 2 ; --idxLevel){ leafsDataArray.reset(new LeafData[leafsNumber]);
FLOG(FLog::Controller << "[Balance] \t level " << idxLevel << ":\n");
typename OctreeClass::Iterator octreeIterator(avoidGotoLeftIterator); // We need the offset for each color
avoidGotoLeftIterator.moveUp(); int startPosAtShape[SizeShape] = {0};
for(int idxShape = 1 ; idxShape < SizeShape ; ++idxShape){
FSize idxCell = 0; startPosAtShape[idxShape] = startPosAtShape[idxShape-1] + shapeLeaves[idxShape-1];
FSize totalWork = 0;
do{
// Keep track of tree iterator
workloadBuffer[idxCell].iterator = octreeIterator;
// Count the nb of M2L for this cell
workloadBuffer[idxCell].amountOfWork = tree->getInteractionNeighbors(neighbors, octreeIterator.getCurrentGlobalCoordinate(), idxLevel, 1);
// Keep the total amount of work
totalWork += workloadBuffer[idxCell].amountOfWork;
++idxCell;
} while(octreeIterator.moveRight());
// Now split between thread
generateIntervalFromWorkload(&workloadM2L[idxLevel], totalWork, workloadBuffer.get(), idxCell);
}
}
{// Do it for the M2M L2L
/// FLOG(FLog::Controller << "[Balance] M2M L2L:\n");
workloadM2M.resize(OctreeHeight);
workloadL2L.resize(OctreeHeight);
typename OctreeClass::Iterator avoidGotoLeftIterator(tree);
avoidGotoLeftIterator.gotoBottomLeft();
avoidGotoLeftIterator.moveUp();
for(int idxLevel = OctreeHeight-2 ; idxLevel >= 2 ; --idxLevel){
FLOG(FLog::Controller << "[Balance] \t level " << idxLevel << ":\n");
typename OctreeClass::Iterator octreeIterator(avoidGotoLeftIterator);
avoidGotoLeftIterator.moveUp();
FSize idxCell = 0;
FSize totalWork = 0;
do{
// Keep track of tree iterator
workloadBuffer[idxCell].iterator = octreeIterator;
// Count the nb of children of the current cell
workloadBuffer[idxCell].amountOfWork = 0;
CellClass** child = octreeIterator.getCurrentChild();
for(int idxChild = 0 ; idxChild < 8 ; ++idxChild){
if(child[idxChild]) workloadBuffer[idxCell].amountOfWork += 1;
} }
// Keep the total amount of work
totalWork += workloadBuffer[idxCell].amountOfWork;
++idxCell;
} while(octreeIterator.moveRight());
// Now split between thread
generateIntervalFromWorkload(&workloadM2M[idxLevel], totalWork, workloadBuffer.get(), idxCell);
generateIntervalFromWorkload(&workloadL2L[idxLevel], totalWork, workloadBuffer.get(), idxCell);
}
}
{ // Prepare each color
// Prepare the P2P typename OctreeClass::Iterator octreeIterator(tree);
const int LeafIndex = OctreeHeight - 1; octreeIterator.gotoBottomLeft();
leafsDataArray.reset(new LeafData[leafsNumber]);
// We need the offset for each color
int startPosAtShape[SizeShape] = {0};
for(int idxShape = 1 ; idxShape < SizeShape ; ++idxShape){
startPosAtShape[idxShape] = startPosAtShape[idxShape-1] + shapeLeaves[idxShape-1];
}
// Prepare each color FSize workPerShape[SizeShape] = {0};
typename OctreeClass::Iterator octreeIterator(tree);
octreeIterator.gotoBottomLeft();
FSize workPerShape[SizeShape] = {0}; // for each leafs
for(int idxLeaf = 0 ; idxLeaf < leafsNumber ; ++idxLeaf){
// for each leafs const FTreeCoordinate& coord = octreeIterator.getCurrentGlobalCoordinate();
for(int idxLeaf = 0 ; idxLeaf < leafsNumber ; ++idxLeaf){ const int shapePosition = (coord.getX()%3)*9 + (coord.getY()%3)*3 + (coord.getZ()%3);
const FTreeCoordinate& coord = octreeIterator.getCurrentGlobalCoordinate();
const int shapePosition = (coord.getX()%3)*9 + (coord.getY()%3)*3 + (coord.getZ()%3);
const int positionToWork = startPosAtShape[shapePosition]++;
leafsDataArray[positionToWork].index = octreeIterator.getCurrentGlobalIndex();
leafsDataArray[positionToWork].coord = coord;
leafsDataArray[positionToWork].targets = octreeIterator.getCurrentListTargets();
leafsDataArray[positionToWork].sources = octreeIterator.getCurrentListSrc();
// For now the cost is simply based on the number of particles
const FSize nbPartInLeaf = octreeIterator.getCurrentListTargets()->getNbParticles();
workloadBuffer[positionToWork].amountOfWork = nbPartInLeaf*nbPartInLeaf;
ContainerClass* neighbors[27];
tree->getLeafsNeighbors(neighbors, octreeIterator.getCurrentGlobalCoordinate(), LeafIndex);
for(int idxNeigh = 0 ; idxNeigh < 27 ; ++idxNeigh){
if(neighbors[idxNeigh]){
workloadBuffer[positionToWork].amountOfWork +=
nbPartInLeaf * neighbors[idxNeigh]->getNbParticles();
}
}
workPerShape[shapePosition] += workloadBuffer[positionToWork].amountOfWork; const int positionToWork = startPosAtShape[shapePosition]++;
octreeIterator.moveRight(); leafsDataArray[positionToWork].index = octreeIterator.getCurrentGlobalIndex();
} leafsDataArray[positionToWork].coord = coord;
leafsDataArray[positionToWork].targets = octreeIterator.getCurrentListTargets();
leafsDataArray[positionToWork].sources = octreeIterator.getCurrentListSrc();
workloadP2P.resize(SizeShape); // For now the cost is simply based on the number of particles
int offsetShape = 0; const FSize nbPartInLeaf = octreeIterator.getCurrentListTargets()->getNbParticles();
workloadBuffer[positionToWork].amountOfWork = nbPartInLeaf*nbPartInLeaf;
ContainerClass* neighbors[27];
tree->getLeafsNeighbors(neighbors, octreeIterator.getCurrentGlobalCoordinate(), LeafIndex);
for(int idxNeigh = 0 ; idxNeigh < 27 ; ++idxNeigh){
if(neighbors[idxNeigh]){
workloadBuffer[positionToWork].amountOfWork +=
nbPartInLeaf * neighbors[idxNeigh]->getNbParticles();
}
}
workPerShape[shapePosition] += workloadBuffer[positionToWork].amountOfWork;
octreeIterator.moveRight();
}
workloadP2P.resize(SizeShape);
int offsetShape = 0;
for(int idxShape = 0 ; idxShape < SizeShape ; ++idxShape){
std::vector<std::pair<int,int>>* intervals = &workloadP2P[idxShape];
const int nbElements = shapeLeaves[idxShape];
const FSize totalWork = workPerShape[idxShape];
// Now split between thread
(*intervals).resize(MaxThreads);
// Ideally each thread will have this
const FSize idealWork = (totalWork/MaxThreads);
// Assign default value for first thread
int idxThread = 0;
(*intervals)[idxThread].first = offsetShape;
FSize assignWork = workloadBuffer[0].amountOfWork;
for(int idxElement = 1+offsetShape ; idxElement < nbElements+offsetShape ; ++idxElement){
if(FMath::Abs((idxThread+1)*idealWork - assignWork) <
FMath::Abs((idxThread+1)*idealWork - assignWork - workloadBuffer[idxElement].amountOfWork)
&& idxThread != MaxThreads-1){
(*intervals)[idxThread].second = idxElement;
/// FLOG(FLog::Controller << "[Balance] Shape " << idxShape << " Thread " << idxThread << " goes from "
/// << (*intervals)[idxThread].first << " to " << (*intervals)[idxThread].second << "\n");
idxThread += 1;
(*intervals)[idxThread].first = idxElement;
}
assignWork += workloadBuffer[idxElement].amountOfWork;
}
(*intervals)[idxThread].second = nbElements + offsetShape;
for(int idxShape = 0 ; idxShape < SizeShape ; ++idxShape){
std::vector<std::pair<int,int>>* intervals = &workloadP2P[idxShape];
const int nbElements = shapeLeaves[idxShape];
const FSize totalWork = workPerShape[idxShape];
// Now split between thread
(*intervals).resize(MaxThreads);
// Ideally each thread will have this
const FSize idealWork = (totalWork/MaxThreads);
// Assign default value for first thread
int idxThread = 0;
(*intervals)[idxThread].first = offsetShape;
FSize assignWork = workloadBuffer[0].amountOfWork;
for(int idxElement = 1+offsetShape ; idxElement < nbElements+offsetShape ; ++idxElement){
if(FMath::Abs((idxThread+1)*idealWork - assignWork) <
FMath::Abs((idxThread+1)*idealWork - assignWork - workloadBuffer[idxElement].amountOfWork)
&& idxThread != MaxThreads-1){
(*intervals)[idxThread].second = idxElement;
/// FLOG(FLog::Controller << "[Balance] Shape " << idxShape << " Thread " << idxThread << " goes from " /// FLOG(FLog::Controller << "[Balance] Shape " << idxShape << " Thread " << idxThread << " goes from "
/// << (*intervals)[idxThread].first << " to " << (*intervals)[idxThread].second << "\n"); /// << (*intervals)[idxThread].first << " to " << (*intervals)[idxThread].second << "\n");
idxThread += 1;
(*intervals)[idxThread].first = idxElement; offsetShape += nbElements;
} }
assignWork += workloadBuffer[idxElement].amountOfWork;
} }
(*intervals)[idxThread].second = nbElements + offsetShape; }
/// FLOG(FLog::Controller << "[Balance] Shape " << idxShape << " Thread " << idxThread << " goes from " #pragma omp barrier
/// << (*intervals)[idxThread].first << " to " << (*intervals)[idxThread].second << "\n"); }
offsetShape += nbElements; for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread){
} delete[] workloadBufferThread[idxThread];
} }
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment