Commit 23b9e49c authored by BRAMAS Berenger's avatar BRAMAS Berenger
Browse files

add task timer into starpu

parent 2693b889
......@@ -362,7 +362,7 @@ protected:
#pragma omp task default(shared) firstprivate(leafCells, cellPoles, containers) depend(inout: cellPoles[0]) priority_if_supported(FGroupTaskDepAlgorithm_Prio_P2M)
{
FTIME_TASKS(FTaskTimer::ScopeEvent taskTime(&taskTimeRecorder, (leafCells->getStartingIndex() << 16) | (0<<8) | 0, "P2M"));
FTIME_TASKS(FTaskTimer::ScopeEvent taskTime(omp_get_thread_num(), &taskTimeRecorder, (leafCells->getStartingIndex() << 16) | (0<<8) | 0, "P2M"));
KernelClass*const kernel = kernels[omp_get_thread_num()];
for(int leafIdx = 0 ; leafIdx < leafCells->getNumberOfCellsInBlock() ; ++leafIdx){
......@@ -407,7 +407,7 @@ protected:
#pragma omp task default(none) firstprivate(idxLevel, currentCells, cellPoles, subCellGroup, subCellGroupPoles) depend(inout: cellPoles[0]) depend(in: subCellGroupPoles[0]) priority_if_supported(FGroupTaskDepAlgorithm_Prio_M2M)
{
FTIME_TASKS(FTaskTimer::ScopeEvent taskTime(&taskTimeRecorder, (currentCells->getStartingIndex() << 16) | (idxLevel<<8) | 1, "M2M"));
FTIME_TASKS(FTaskTimer::ScopeEvent taskTime(omp_get_thread_num(), &taskTimeRecorder, (currentCells->getStartingIndex() << 16) | (idxLevel<<8) | 1, "M2M"));
KernelClass*const kernel = kernels[omp_get_thread_num()];
const MortonIndex firstParent = FMath::Max(currentCells->getStartingIndex(), subCellGroup->getStartingIndex()>>3);
const MortonIndex lastParent = FMath::Min(currentCells->getEndingIndex()-1, (subCellGroup->getEndingIndex()-1)>>3);
......@@ -482,7 +482,7 @@ protected:
#pragma omp task default(none) firstprivate(currentCells, cellPoles, cellLocals, idxLevel) depend(commute_if_supported: cellLocals[0]) depend(in: cellPoles[0]) priority_if_supported(idxLevel==FAbstractAlgorithm::lowerWorkingLevel-1?FGroupTaskDepAlgorithm_Prio_M2L:FGroupTaskDepAlgorithm_Prio_M2L_High)
{
FTIME_TASKS(FTaskTimer::ScopeEvent taskTime(&taskTimeRecorder, (currentCells->getStartingIndex() << 16) | (idxLevel<<8) | 2, "M2L"));
FTIME_TASKS(FTaskTimer::ScopeEvent taskTime(omp_get_thread_num(), &taskTimeRecorder, (currentCells->getStartingIndex() << 16) | (idxLevel<<8) | 2, "M2L"));
const MortonIndex blockStartIdx = currentCells->getStartingIndex();
const MortonIndex blockEndIdx = currentCells->getEndingIndex();
KernelClass*const kernel = kernels[omp_get_thread_num()];
......@@ -545,7 +545,7 @@ protected:
#pragma omp task default(none) firstprivate(currentCells, cellLocals, outsideInteractions, cellsOther, cellOtherPoles, idxLevel) depend(commute_if_supported: cellLocals[0]) depend(in: cellOtherPoles[0]) priority_if_supported(idxLevel==FAbstractAlgorithm::lowerWorkingLevel-1?FGroupTaskDepAlgorithm_Prio_M2L:FGroupTaskDepAlgorithm_Prio_M2L_High)
{
FTIME_TASKS(FTaskTimer::ScopeEvent taskTime(&taskTimeRecorder, (cellsOther->getStartingIndex()) << 50 | (currentCells->getStartingIndex() << 16) | (idxLevel<<8) | 3, "M2L ext"));
FTIME_TASKS(FTaskTimer::ScopeEvent taskTime(omp_get_thread_num(), &taskTimeRecorder, (cellsOther->getStartingIndex()) << 50 | (currentCells->getStartingIndex() << 16) | (idxLevel<<8) | 3, "M2L-ext"));
KernelClass*const kernel = kernels[omp_get_thread_num()];
for(int outInterIdx = 0 ; outInterIdx < int(outsideInteractions->size()) ; ++outInterIdx){
......@@ -561,7 +561,7 @@ protected:
#pragma omp task default(none) firstprivate(currentCells, cellPoles, outsideInteractions, cellsOther, cellOtherLocals, idxLevel) depend(commute_if_supported: cellOtherLocals[0]) depend(in: cellPoles[0]) priority_if_supported(idxLevel==FAbstractAlgorithm::lowerWorkingLevel-1?FGroupTaskDepAlgorithm_Prio_M2L:FGroupTaskDepAlgorithm_Prio_M2L_High)
{
FTIME_TASKS(FTaskTimer::ScopeEvent taskTime(&taskTimeRecorder, (currentCells->getStartingIndex()) << 50 | (cellsOther->getStartingIndex() << 16) | (idxLevel<<8) | 3, "M2L ext"));
FTIME_TASKS(FTaskTimer::ScopeEvent taskTime(omp_get_thread_num(), &taskTimeRecorder, (currentCells->getStartingIndex()) << 50 | (cellsOther->getStartingIndex() << 16) | (idxLevel<<8) | 3, "M2L-ext"));
KernelClass*const kernel = kernels[omp_get_thread_num()];
for(int outInterIdx = 0 ; outInterIdx < int(outsideInteractions->size()) ; ++outInterIdx){
......@@ -620,7 +620,7 @@ protected:
#pragma omp task default(none) firstprivate(idxLevel, currentCells, cellLocals, subCellGroup, subCellLocalGroupsLocal) depend(commute_if_supported: subCellLocalGroupsLocal[0]) depend(in: cellLocals[0]) priority_if_supported(FGroupTaskDepAlgorithm_Prio_L2L)
{
FTIME_TASKS(FTaskTimer::ScopeEvent taskTime(&taskTimeRecorder, (currentCells->getStartingIndex() << 16) | (idxLevel<<8) | 4, "L2L"));
FTIME_TASKS(FTaskTimer::ScopeEvent taskTime(omp_get_thread_num(), &taskTimeRecorder, (currentCells->getStartingIndex() << 16) | (idxLevel<<8) | 4, "L2L"));
KernelClass*const kernel = kernels[omp_get_thread_num()];
const MortonIndex firstParent = FMath::Max(currentCells->getStartingIndex(), subCellGroup->getStartingIndex()>>3);
......@@ -695,7 +695,7 @@ protected:
#pragma omp task default(none) firstprivate(containers, containersDown) depend(commute_if_supported: containersDown[0]) priority_if_supported(FGroupTaskDepAlgorithm_Prio_P2P_Big)
{
FTIME_TASKS(FTaskTimer::ScopeEvent taskTime(&taskTimeRecorder, (containers->getStartingIndex() << 16) | (0<<8) | 5, "P2P"));
FTIME_TASKS(FTaskTimer::ScopeEvent taskTime(omp_get_thread_num(), &taskTimeRecorder, (containers->getStartingIndex() << 16) | (0<<8) | 5, "P2P"));
const MortonIndex blockStartIdx = containers->getStartingIndex();
const MortonIndex blockEndIdx = containers->getEndingIndex();
KernelClass*const kernel = kernels[omp_get_thread_num()];
......@@ -753,7 +753,7 @@ protected:
#pragma omp task default(none) firstprivate(containers, containersDown, containersOther, containersOtherDown, outsideInteractions) depend(commute_if_supported: containersOtherDown[0], containersDown[0]) priority_if_supported(FGroupTaskDepAlgorithm_Prio_P2P_Small)
{
FTIME_TASKS(FTaskTimer::ScopeEvent taskTime(&taskTimeRecorder, (containersOther->getStartingIndex()) << 50 | (containers->getStartingIndex() << 16) | (0<<8) | 6, "P2P ext"));
FTIME_TASKS(FTaskTimer::ScopeEvent taskTime(omp_get_thread_num(), &taskTimeRecorder, (containersOther->getStartingIndex()) << 50 | (containers->getStartingIndex() << 16) | (0<<8) | 6, "P2P-ext"));
KernelClass*const kernel = kernels[omp_get_thread_num()];
for(int outInterIdx = 0 ; outInterIdx < int(outsideInteractions->size()) ; ++outInterIdx){
ParticleContainerClass interParticles = containersOther->template getLeaf<ParticleContainerClass>((*outsideInteractions)[outInterIdx].outsideIdxInBlock);
......@@ -798,7 +798,7 @@ protected:
#pragma omp task default(shared) firstprivate(leafCells, cellLocals, containers, containersDown) depend(commute_if_supported: containersDown[0]) depend(in: cellLocals[0]) priority_if_supported(FGroupTaskDepAlgorithm_Prio_L2P)
{
FTIME_TASKS(FTaskTimer::ScopeEvent taskTime(&taskTimeRecorder, (leafCells->getStartingIndex() << 16) | (0<<8) | 7, "L2P"));
FTIME_TASKS(FTaskTimer::ScopeEvent taskTime(omp_get_thread_num(), &taskTimeRecorder, (leafCells->getStartingIndex() << 16) | (0<<8) | 7, "L2P"));
KernelClass*const kernel = kernels[omp_get_thread_num()];
for(int cellIdx = 0 ; cellIdx < leafCells->getNumberOfCellsInBlock() ; ++cellIdx){
......
......@@ -35,6 +35,8 @@
#include "FStarPUUtils.hpp"
#include "../../Utils/FTaskTimer.hpp"
template <class CellContainerClass, class CellClass, class KernelClass,
class ParticleGroupClass, class ParticleContainerClass>
class FStarPUCpuWrapper {
......@@ -51,14 +53,25 @@ protected:
const int treeHeight;
KernelClass* kernels[STARPU_MAXCPUS]; //< The kernels
#ifdef SCALFMM_TIME_OMPTASKS
FTaskTimer taskTimeRecorder;
#endif
public:
FStarPUCpuWrapper(const int inTreeHeight): treeHeight(inTreeHeight){
FStarPUCpuWrapper(const int inTreeHeight): treeHeight(inTreeHeight)
#ifdef SCALFMM_TIME_OMPTASKS
, taskTimeRecorder(STARPU_MAXCPUS)
#endif
{
memset(kernels, 0, sizeof(KernelClass*)*STARPU_MAXCPUS);
}
void initKernel(const int workerId, KernelClass* originalKernel){
FAssertLF(kernels[workerId] == nullptr);
kernels[workerId] = new KernelClass(*originalKernel);
#ifdef SCALFMM_TIME_OMPTASKS
taskTimeRecorder.init(starpu_worker_get_id());
#endif
}
void releaseKernel(const int workerId){
......@@ -88,6 +101,7 @@ public:
}
void bottomPassPerform(CellContainerClass* leafCells, ParticleGroupClass* containers){
FTIME_TASKS(FTaskTimer::ScopeEvent taskTime(starpu_worker_get_id(), &taskTimeRecorder, (leafCells->getStartingIndex() << 16) | (0<<8) | 0, "P2M"));
FAssertLF(leafCells->getNumberOfCellsInBlock() == containers->getNumberOfLeavesInBlock());
KernelClass*const kernel = kernels[starpu_worker_get_id()];
......@@ -126,6 +140,7 @@ public:
void upwardPassPerform(CellContainerClass*const currentCells,
CellContainerClass* subCellGroup,
const int idxLevel){
FTIME_TASKS(FTaskTimer::ScopeEvent taskTime(starpu_worker_get_id(), &taskTimeRecorder, (currentCells->getStartingIndex() << 16) | (idxLevel<<8) | 1, "M2M"));
KernelClass*const kernel = kernels[starpu_worker_get_id()];
const MortonIndex firstParent = FMath::Max(currentCells->getStartingIndex(), subCellGroup->getStartingIndex()>>3);
......@@ -228,6 +243,7 @@ public:
}
void transferInPassPerform(CellContainerClass*const currentCells, const int idxLevel){
FTIME_TASKS(FTaskTimer::ScopeEvent taskTime(starpu_worker_get_id(), &taskTimeRecorder, (currentCells->getStartingIndex() << 16) | (idxLevel<<8) | 2, "M2L"));
const MortonIndex blockStartIdx = currentCells->getStartingIndex();
const MortonIndex blockEndIdx = currentCells->getEndingIndex();
KernelClass*const kernel = kernels[starpu_worker_get_id()];
......@@ -293,6 +309,7 @@ public:
KernelClass*const kernel = kernels[starpu_worker_get_id()];
if(mode == 1){
FTIME_TASKS(FTaskTimer::ScopeEvent taskTime(starpu_worker_get_id(), &taskTimeRecorder, (cellsOther->getStartingIndex()) << 50 | (currentCells->getStartingIndex() << 16) | (idxLevel<<8) | 3, "M2L-ext"));
for(int outInterIdx = 0 ; outInterIdx < int(outsideInteractions->size()) ; ++outInterIdx){
CellClass interCell = cellsOther->getUpCell((*outsideInteractions)[outInterIdx].outsideIdxInBlock);
FAssertLF(interCell.getMortonIndex() == (*outsideInteractions)[outInterIdx].outIndex);
......@@ -304,6 +321,7 @@ public:
}
}
else{
FTIME_TASKS(FTaskTimer::ScopeEvent taskTime(starpu_worker_get_id(), &taskTimeRecorder, (currentCells->getStartingIndex()) << 50 | (cellsOther->getStartingIndex() << 16) | (idxLevel<<8) | 3, "M2L-ext"));
for(int outInterIdx = 0 ; outInterIdx < int(outsideInteractions->size()) ; ++outInterIdx){
CellClass cell = cellsOther->getUpCell((*outsideInteractions)[outInterIdx].insideIdxInBlock);
FAssertLF(cell.getMortonIndex() == (*outsideInteractions)[outInterIdx].insideIndex);
......@@ -343,6 +361,7 @@ public:
void downardPassPerform(CellContainerClass*const currentCells,
CellContainerClass* subCellGroup,
const int idxLevel){
FTIME_TASKS(FTaskTimer::ScopeEvent taskTime(starpu_worker_get_id(), &taskTimeRecorder, (currentCells->getStartingIndex() << 16) | (idxLevel<<8) | 4, "L2L"));
KernelClass*const kernel = kernels[starpu_worker_get_id()];
const MortonIndex firstParent = FMath::Max(currentCells->getStartingIndex(), subCellGroup->getStartingIndex()>>3);
......@@ -436,6 +455,7 @@ public:
}
void directInPassPerform(ParticleGroupClass* containers){
FTIME_TASKS(FTaskTimer::ScopeEvent taskTime(starpu_worker_get_id(), &taskTimeRecorder, (containers->getStartingIndex() << 16) | (0<<8) | 5, "P2P"));
const MortonIndex blockStartIdx = containers->getStartingIndex();
const MortonIndex blockEndIdx = containers->getEndingIndex();
KernelClass*const kernel = kernels[starpu_worker_get_id()];
......@@ -486,7 +506,8 @@ public:
void directInoutPassPerform(ParticleGroupClass* containers, ParticleGroupClass* containersOther,
const std::vector<OutOfBlockInteraction>* outsideInteractions){
KernelClass*const kernel = kernels[omp_get_thread_num()];
FTIME_TASKS(FTaskTimer::ScopeEvent taskTime(starpu_worker_get_id(), &taskTimeRecorder, (containersOther->getStartingIndex()) << 50 | (containers->getStartingIndex() << 16) | (0<<8) | 6, "P2P-ext"));
KernelClass*const kernel = kernels[starpu_worker_get_id()];
for(int outInterIdx = 0 ; outInterIdx < int(outsideInteractions->size()) ; ++outInterIdx){
ParticleContainerClass interParticles = containersOther->template getLeaf<ParticleContainerClass>((*outsideInteractions)[outInterIdx].outsideIdxInBlock);
ParticleContainerClass particles = containers->template getLeaf<ParticleContainerClass>((*outsideInteractions)[outInterIdx].insideIdxInBlock);
......@@ -524,6 +545,7 @@ public:
}
void mergePassPerform(CellContainerClass* leafCells, ParticleGroupClass* containers){
FTIME_TASKS(FTaskTimer::ScopeEvent taskTime(starpu_worker_get_id(), &taskTimeRecorder, (leafCells->getStartingIndex() << 16) | (0<<8) | 7, "L2P"));
FAssertLF(leafCells->getNumberOfCellsInBlock() == containers->getNumberOfLeavesInBlock());
KernelClass*const kernel = kernels[starpu_worker_get_id()];
......
......@@ -8,7 +8,6 @@
#include "../Containers/FVector.hpp"
#include <unordered_set>
#include <omp.h>
#ifdef SCALFMM_TIME_OMPTASKS
......@@ -45,16 +44,17 @@ protected:
public:
explicit FTaskTimer(const int inNbThreads = -1)
: nbThreads(inNbThreads>0?inNbThreads:omp_get_max_threads()), threadEvents(nullptr),
explicit FTaskTimer(const int inNbThreads)
: nbThreads(inNbThreads), threadEvents(nullptr),
startingTime(0) {
FLOG( FLog::Controller << "\tFTaskTimer is used\n" );
threadEvents = new ThreadData*[nbThreads];
#pragma omp parallel num_threads(nbThreads)
{
threadEvents[omp_get_thread_num()] = new ThreadData;
}
memset(threadEvents, 0, sizeof(threadEvents[0])*nbThreads);
}
void init(const int threadId){
threadEvents[threadId] = new ThreadData;
}
~FTaskTimer(){
......@@ -66,9 +66,8 @@ public:
void start(){
FLOG( FLog::Controller << "\tFTaskTimer starts\n" );
#pragma omp parallel num_threads(nbThreads)
{
threadEvents[omp_get_thread_num()]->events.clear();
for(int idxThread = 0 ; idxThread < nbThreads ; ++idxThread){
threadEvents[idxThread]->events.clear();
}
startingTime = FTic::GetTime();
}
......@@ -118,18 +117,18 @@ public:
char taskText[MaxTextLength];
public:
ScopeEvent(FTaskTimer* eventsManager, const long long int inTaskId, const char inText[MaxTextLength])
ScopeEvent(const int threadId, FTaskTimer* eventsManager, const long long int inTaskId, const char inText[MaxTextLength])
: eventStartingTime(FTic::GetTime()), measureStartingTime(eventsManager->startingTime),
myEvents(eventsManager->threadEvents[omp_get_thread_num()]),
myEvents(eventsManager->threadEvents[threadId]),
taskId(inTaskId){
taskText[0] = '\0';
strncpy(taskText, inText, MaxTextLength);
}
template <class FirstParameters, class ... Parameters>
ScopeEvent(FTaskTimer* eventsManager, const long long int inTaskId, const char inTextFormat, FirstParameters fparam, Parameters ... params)
ScopeEvent(const int threadId, FTaskTimer* eventsManager, const long long int inTaskId, const char inTextFormat, FirstParameters fparam, Parameters ... params)
: eventStartingTime(FTic::GetTime()), measureStartingTime(eventsManager->startingTime),
myEvents(eventsManager->threadEvents[omp_get_thread_num()]),
myEvents(eventsManager->threadEvents[threadId]),
taskId(inTaskId){
snprintf(taskText, MaxTextLength, inTextFormat, fparam, params...);
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment