Commit a0d98bea authored by BRAMAS Berenger's avatar BRAMAS Berenger
Browse files

ensure unique keys for task timing

parent 0c3f83af
......@@ -362,7 +362,7 @@ protected:
#pragma omp task default(shared) firstprivate(leafCells, cellPoles, containers) depend(inout: cellPoles[0]) priority_if_supported(FGroupTaskDepAlgorithm_Prio_P2M)
{
FTIME_TASKS(FTaskTimer::ScopeEvent taskTime(omp_get_thread_num(), &taskTimeRecorder, (leafCells->getStartingIndex() << 16) | (0<<8) | 0, "P2M"));
FTIME_TASKS(FTaskTimer::ScopeEvent taskTime(omp_get_thread_num(), &taskTimeRecorder, leafCells->getStartingIndex() * 20 * 8, "P2M"));
KernelClass*const kernel = kernels[omp_get_thread_num()];
for(int leafIdx = 0 ; leafIdx < leafCells->getNumberOfCellsInBlock() ; ++leafIdx){
......@@ -407,10 +407,10 @@ protected:
#pragma omp task default(none) firstprivate(idxLevel, currentCells, cellPoles, subCellGroup, subCellGroupPoles) depend(inout: cellPoles[0]) depend(in: subCellGroupPoles[0]) priority_if_supported(FGroupTaskDepAlgorithm_Prio_M2M)
{
FTIME_TASKS(FTaskTimer::ScopeEvent taskTime(omp_get_thread_num(), &taskTimeRecorder, (currentCells->getStartingIndex() << 16) | (idxLevel<<8) | 1, "M2M"));
KernelClass*const kernel = kernels[omp_get_thread_num()];
const MortonIndex firstParent = FMath::Max(currentCells->getStartingIndex(), subCellGroup->getStartingIndex()>>3);
const MortonIndex lastParent = FMath::Min(currentCells->getEndingIndex()-1, (subCellGroup->getEndingIndex()-1)>>3);
FTIME_TASKS(FTaskTimer::ScopeEvent taskTime(omp_get_thread_num(), &taskTimeRecorder, ((lastParent * 20) + idxLevel) * 8 + 1, "M2M"));
int idxParentCell = currentCells->getCellIndex(firstParent);
FAssertLF(idxParentCell != -1);
......@@ -482,7 +482,7 @@ protected:
#pragma omp task default(none) firstprivate(currentCells, cellPoles, cellLocals, idxLevel) depend(commute_if_supported: cellLocals[0]) depend(in: cellPoles[0]) priority_if_supported(idxLevel==FAbstractAlgorithm::lowerWorkingLevel-1?FGroupTaskDepAlgorithm_Prio_M2L:FGroupTaskDepAlgorithm_Prio_M2L_High)
{
FTIME_TASKS(FTaskTimer::ScopeEvent taskTime(omp_get_thread_num(), &taskTimeRecorder, (currentCells->getStartingIndex() << 16) | (idxLevel<<8) | 2, "M2L"));
FTIME_TASKS(FTaskTimer::ScopeEvent taskTime(omp_get_thread_num(), &taskTimeRecorder, ((currentCells->getStartingIndex() *20) + idxLevel ) * 8 + 2, "M2L"));
const MortonIndex blockStartIdx = currentCells->getStartingIndex();
const MortonIndex blockEndIdx = currentCells->getEndingIndex();
KernelClass*const kernel = kernels[omp_get_thread_num()];
......@@ -545,7 +545,7 @@ protected:
#pragma omp task default(none) firstprivate(currentCells, cellLocals, outsideInteractions, cellsOther, cellOtherPoles, idxLevel) depend(commute_if_supported: cellLocals[0]) depend(in: cellOtherPoles[0]) priority_if_supported(idxLevel==FAbstractAlgorithm::lowerWorkingLevel-1?FGroupTaskDepAlgorithm_Prio_M2L:FGroupTaskDepAlgorithm_Prio_M2L_High)
{
FTIME_TASKS(FTaskTimer::ScopeEvent taskTime(omp_get_thread_num(), &taskTimeRecorder, (cellsOther->getStartingIndex()) << 50 | (currentCells->getStartingIndex() << 16) | (idxLevel<<8) | 3, "M2L-ext"));
FTIME_TASKS(FTaskTimer::ScopeEvent taskTime(omp_get_thread_num(), &taskTimeRecorder, (((currentCells->getStartingIndex()<<1) ^ cellsOther->getStartingIndex()) * 20 + idxLevel) * 8 + 3, "M2L-ext"));
KernelClass*const kernel = kernels[omp_get_thread_num()];
for(int outInterIdx = 0 ; outInterIdx < int(outsideInteractions->size()) ; ++outInterIdx){
......@@ -561,7 +561,7 @@ protected:
#pragma omp task default(none) firstprivate(currentCells, cellPoles, outsideInteractions, cellsOther, cellOtherLocals, idxLevel) depend(commute_if_supported: cellOtherLocals[0]) depend(in: cellPoles[0]) priority_if_supported(idxLevel==FAbstractAlgorithm::lowerWorkingLevel-1?FGroupTaskDepAlgorithm_Prio_M2L:FGroupTaskDepAlgorithm_Prio_M2L_High)
{
FTIME_TASKS(FTaskTimer::ScopeEvent taskTime(omp_get_thread_num(), &taskTimeRecorder, (currentCells->getStartingIndex()) << 50 | (cellsOther->getStartingIndex() << 16) | (idxLevel<<8) | 3, "M2L-ext"));
FTIME_TASKS(FTaskTimer::ScopeEvent taskTime(omp_get_thread_num(), &taskTimeRecorder, (((currentCells->getStartingIndex()) ^ (cellsOther->getStartingIndex()<<1)) * 20 + idxLevel) * 8 + 3, "M2L-ext"));
KernelClass*const kernel = kernels[omp_get_thread_num()];
for(int outInterIdx = 0 ; outInterIdx < int(outsideInteractions->size()) ; ++outInterIdx){
......@@ -620,11 +620,11 @@ protected:
#pragma omp task default(none) firstprivate(idxLevel, currentCells, cellLocals, subCellGroup, subCellLocalGroupsLocal) depend(commute_if_supported: subCellLocalGroupsLocal[0]) depend(in: cellLocals[0]) priority_if_supported(FGroupTaskDepAlgorithm_Prio_L2L)
{
FTIME_TASKS(FTaskTimer::ScopeEvent taskTime(omp_get_thread_num(), &taskTimeRecorder, (currentCells->getStartingIndex() << 16) | (idxLevel<<8) | 4, "L2L"));
KernelClass*const kernel = kernels[omp_get_thread_num()];
const MortonIndex firstParent = FMath::Max(currentCells->getStartingIndex(), subCellGroup->getStartingIndex()>>3);
const MortonIndex lastParent = FMath::Min(currentCells->getEndingIndex()-1, (subCellGroup->getEndingIndex()-1)>>3);
FTIME_TASKS(FTaskTimer::ScopeEvent taskTime(omp_get_thread_num(), &taskTimeRecorder, ((lastParent * 20) + idxLevel) * 8 + 4, "L2L"));
int idxParentCell = currentCells->getCellIndex(firstParent);
FAssertLF(idxParentCell != -1);
......@@ -695,7 +695,7 @@ protected:
#pragma omp task default(none) firstprivate(containers, containersDown) depend(commute_if_supported: containersDown[0]) priority_if_supported(FGroupTaskDepAlgorithm_Prio_P2P_Big)
{
FTIME_TASKS(FTaskTimer::ScopeEvent taskTime(omp_get_thread_num(), &taskTimeRecorder, (containers->getStartingIndex() << 16) | (0<<8) | 5, "P2P"));
FTIME_TASKS(FTaskTimer::ScopeEvent taskTime(omp_get_thread_num(), &taskTimeRecorder, containers->getStartingIndex()*20*8 + 5, "P2P"));
const MortonIndex blockStartIdx = containers->getStartingIndex();
const MortonIndex blockEndIdx = containers->getEndingIndex();
KernelClass*const kernel = kernels[omp_get_thread_num()];
......@@ -753,7 +753,7 @@ protected:
#pragma omp task default(none) firstprivate(containers, containersDown, containersOther, containersOtherDown, outsideInteractions) depend(commute_if_supported: containersOtherDown[0], containersDown[0]) priority_if_supported(FGroupTaskDepAlgorithm_Prio_P2P_Small)
{
FTIME_TASKS(FTaskTimer::ScopeEvent taskTime(omp_get_thread_num(), &taskTimeRecorder, (containersOther->getStartingIndex()) << 50 | (containers->getStartingIndex() << 16) | (0<<8) | 6, "P2P-ext"));
FTIME_TASKS(FTaskTimer::ScopeEvent taskTime(omp_get_thread_num(), &taskTimeRecorder, (containersOther->getStartingIndex() ^ containers->getStartingIndex())*20*8 + 6, "P2P-ext"));
KernelClass*const kernel = kernels[omp_get_thread_num()];
for(int outInterIdx = 0 ; outInterIdx < int(outsideInteractions->size()) ; ++outInterIdx){
ParticleContainerClass interParticles = containersOther->template getLeaf<ParticleContainerClass>((*outsideInteractions)[outInterIdx].outsideIdxInBlock);
......@@ -798,7 +798,7 @@ protected:
#pragma omp task default(shared) firstprivate(leafCells, cellLocals, containers, containersDown) depend(commute_if_supported: containersDown[0]) depend(in: cellLocals[0]) priority_if_supported(FGroupTaskDepAlgorithm_Prio_L2P)
{
FTIME_TASKS(FTaskTimer::ScopeEvent taskTime(omp_get_thread_num(), &taskTimeRecorder, (leafCells->getStartingIndex() << 16) | (0<<8) | 7, "L2P"));
FTIME_TASKS(FTaskTimer::ScopeEvent taskTime(omp_get_thread_num(), &taskTimeRecorder, (leafCells->getStartingIndex()*20*8) + 7, "L2P"));
KernelClass*const kernel = kernels[omp_get_thread_num()];
for(int cellIdx = 0 ; cellIdx < leafCells->getNumberOfCellsInBlock() ; ++cellIdx){
......
......@@ -303,6 +303,10 @@ protected:
FLOG( FLog::Controller << "\tStart FGroupTaskStarPUAlgorithm\n" );
const bool directOnly = (tree->getHeight() <= 2);
#ifdef STARPU_USE_CPU
FTIME_TASKS(cpuWrapper.taskTimeRecorder.start());
#endif
starpu_resume();
if(operationsToProceed & FFmmP2M && !directOnly) bottomPass();
......@@ -325,6 +329,11 @@ protected:
starpu_task_wait_for_all();
starpu_pause();
#ifdef STARPU_USE_CPU
FTIME_TASKS(cpuWrapper.taskTimeRecorder.end());
FTIME_TASKS(cpuWrapper.taskTimeRecorder.saveToDisk("/tmp/taskstime-FGroupTaskStarPUAlgorithm.txt"));
#endif
}
......
......@@ -53,11 +53,12 @@ protected:
const int treeHeight;
KernelClass* kernels[STARPU_MAXCPUS]; //< The kernels
public:
#ifdef SCALFMM_TIME_OMPTASKS
FTaskTimer taskTimeRecorder;
#endif
public:
FStarPUCpuWrapper(const int inTreeHeight): treeHeight(inTreeHeight)
#ifdef SCALFMM_TIME_OMPTASKS
, taskTimeRecorder(STARPU_MAXCPUS)
......@@ -101,7 +102,7 @@ public:
}
void bottomPassPerform(CellContainerClass* leafCells, ParticleGroupClass* containers){
FTIME_TASKS(FTaskTimer::ScopeEvent taskTime(starpu_worker_get_id(), &taskTimeRecorder, (leafCells->getStartingIndex() << 16) | (0<<8) | 0, "P2M"));
FTIME_TASKS(FTaskTimer::ScopeEvent taskTime(starpu_worker_get_id(), &taskTimeRecorder, leafCells->getStartingIndex() * 20 * 8, "P2M"));
FAssertLF(leafCells->getNumberOfCellsInBlock() == containers->getNumberOfLeavesInBlock());
KernelClass*const kernel = kernels[starpu_worker_get_id()];
......@@ -140,11 +141,11 @@ public:
void upwardPassPerform(CellContainerClass*const currentCells,
CellContainerClass* subCellGroup,
const int idxLevel){
FTIME_TASKS(FTaskTimer::ScopeEvent taskTime(starpu_worker_get_id(), &taskTimeRecorder, (currentCells->getStartingIndex() << 16) | (idxLevel<<8) | 1, "M2M"));
KernelClass*const kernel = kernels[starpu_worker_get_id()];
const MortonIndex firstParent = FMath::Max(currentCells->getStartingIndex(), subCellGroup->getStartingIndex()>>3);
const MortonIndex lastParent = FMath::Min(currentCells->getEndingIndex()-1, (subCellGroup->getEndingIndex()-1)>>3);
FTIME_TASKS(FTaskTimer::ScopeEvent taskTime(starpu_worker_get_id(), &taskTimeRecorder, ((lastParent * 20) + idxLevel) * 8 + 1, "M2M"));
int idxParentCell = currentCells->getCellIndex(firstParent);
FAssertLF(idxParentCell != -1);
......@@ -243,7 +244,7 @@ public:
}
void transferInPassPerform(CellContainerClass*const currentCells, const int idxLevel){
FTIME_TASKS(FTaskTimer::ScopeEvent taskTime(starpu_worker_get_id(), &taskTimeRecorder, (currentCells->getStartingIndex() << 16) | (idxLevel<<8) | 2, "M2L"));
FTIME_TASKS(FTaskTimer::ScopeEvent taskTime(starpu_worker_get_id(), &taskTimeRecorder, ((currentCells->getStartingIndex() *20) + idxLevel ) * 8 + 2, "M2L"));
const MortonIndex blockStartIdx = currentCells->getStartingIndex();
const MortonIndex blockEndIdx = currentCells->getEndingIndex();
KernelClass*const kernel = kernels[starpu_worker_get_id()];
......@@ -306,10 +307,10 @@ public:
const int idxLevel,
const std::vector<OutOfBlockInteraction>* outsideInteractions,
const int mode){
FTIME_TASKS(FTaskTimer::ScopeEvent taskTime(starpu_worker_get_id(), &taskTimeRecorder, (((currentCells->getStartingIndex()<<1) ^ cellsOther->getStartingIndex()) * 20 + idxLevel) * 8 + 3, "M2L-ext"));
KernelClass*const kernel = kernels[starpu_worker_get_id()];
if(mode == 1){
FTIME_TASKS(FTaskTimer::ScopeEvent taskTime(starpu_worker_get_id(), &taskTimeRecorder, (cellsOther->getStartingIndex()) << 50 | (currentCells->getStartingIndex() << 16) | (idxLevel<<8) | 3, "M2L-ext"));
for(int outInterIdx = 0 ; outInterIdx < int(outsideInteractions->size()) ; ++outInterIdx){
CellClass interCell = cellsOther->getUpCell((*outsideInteractions)[outInterIdx].outsideIdxInBlock);
FAssertLF(interCell.getMortonIndex() == (*outsideInteractions)[outInterIdx].outIndex);
......@@ -321,7 +322,6 @@ public:
}
}
else{
FTIME_TASKS(FTaskTimer::ScopeEvent taskTime(starpu_worker_get_id(), &taskTimeRecorder, (currentCells->getStartingIndex()) << 50 | (cellsOther->getStartingIndex() << 16) | (idxLevel<<8) | 3, "M2L-ext"));
for(int outInterIdx = 0 ; outInterIdx < int(outsideInteractions->size()) ; ++outInterIdx){
CellClass cell = cellsOther->getUpCell((*outsideInteractions)[outInterIdx].insideIdxInBlock);
FAssertLF(cell.getMortonIndex() == (*outsideInteractions)[outInterIdx].insideIndex);
......@@ -361,11 +361,11 @@ public:
void downardPassPerform(CellContainerClass*const currentCells,
CellContainerClass* subCellGroup,
const int idxLevel){
FTIME_TASKS(FTaskTimer::ScopeEvent taskTime(starpu_worker_get_id(), &taskTimeRecorder, (currentCells->getStartingIndex() << 16) | (idxLevel<<8) | 4, "L2L"));
KernelClass*const kernel = kernels[starpu_worker_get_id()];
const MortonIndex firstParent = FMath::Max(currentCells->getStartingIndex(), subCellGroup->getStartingIndex()>>3);
const MortonIndex lastParent = FMath::Min(currentCells->getEndingIndex()-1, (subCellGroup->getEndingIndex()-1)>>3);
FTIME_TASKS(FTaskTimer::ScopeEvent taskTime(starpu_worker_get_id(), &taskTimeRecorder, ((lastParent * 20) + idxLevel) * 8 + 4, "L2L"));
int idxParentCell = currentCells->getCellIndex(firstParent);
FAssertLF(idxParentCell != -1);
......@@ -455,7 +455,7 @@ public:
}
void directInPassPerform(ParticleGroupClass* containers){
FTIME_TASKS(FTaskTimer::ScopeEvent taskTime(starpu_worker_get_id(), &taskTimeRecorder, (containers->getStartingIndex() << 16) | (0<<8) | 5, "P2P"));
FTIME_TASKS(FTaskTimer::ScopeEvent taskTime(starpu_worker_get_id(), &taskTimeRecorder, containers->getStartingIndex()*20*8 + 5, "P2P"));
const MortonIndex blockStartIdx = containers->getStartingIndex();
const MortonIndex blockEndIdx = containers->getEndingIndex();
KernelClass*const kernel = kernels[starpu_worker_get_id()];
......@@ -506,7 +506,7 @@ public:
void directInoutPassPerform(ParticleGroupClass* containers, ParticleGroupClass* containersOther,
const std::vector<OutOfBlockInteraction>* outsideInteractions){
FTIME_TASKS(FTaskTimer::ScopeEvent taskTime(starpu_worker_get_id(), &taskTimeRecorder, (containersOther->getStartingIndex()) << 50 | (containers->getStartingIndex() << 16) | (0<<8) | 6, "P2P-ext"));
FTIME_TASKS(FTaskTimer::ScopeEvent taskTime(starpu_worker_get_id(), &taskTimeRecorder, (containersOther->getStartingIndex() ^ containers->getStartingIndex())*20*8 + 6, "P2P-ext"));
KernelClass*const kernel = kernels[starpu_worker_get_id()];
for(int outInterIdx = 0 ; outInterIdx < int(outsideInteractions->size()) ; ++outInterIdx){
ParticleContainerClass interParticles = containersOther->template getLeaf<ParticleContainerClass>((*outsideInteractions)[outInterIdx].outsideIdxInBlock);
......@@ -545,7 +545,7 @@ public:
}
void mergePassPerform(CellContainerClass* leafCells, ParticleGroupClass* containers){
FTIME_TASKS(FTaskTimer::ScopeEvent taskTime(starpu_worker_get_id(), &taskTimeRecorder, (leafCells->getStartingIndex() << 16) | (0<<8) | 7, "L2P"));
FTIME_TASKS(FTaskTimer::ScopeEvent taskTime(starpu_worker_get_id(), &taskTimeRecorder, (leafCells->getStartingIndex()*20*8) + 7, "L2P"));
FAssertLF(leafCells->getNumberOfCellsInBlock() == containers->getNumberOfLeavesInBlock());
KernelClass*const kernel = kernels[starpu_worker_get_id()];
......
......@@ -67,7 +67,7 @@ public:
void start(){
FLOG( FLog::Controller << "\tFTaskTimer starts\n" );
for(int idxThread = 0 ; idxThread < nbThreads ; ++idxThread){
threadEvents[idxThread]->events.clear();
if(threadEvents[idxThread]) threadEvents[idxThread]->events.clear();
}
startingTime = FTic::GetTime();
}
......@@ -86,7 +86,9 @@ public:
FSize totalEvents = 0;
for(int idxThread = 0 ; idxThread < nbThreads ; ++idxThread){
totalEvents += threadEvents[idxThread]->events.getSize();
if(threadEvents[idxThread]){
totalEvents += threadEvents[idxThread]->events.getSize();
}
}
fprintf(foutput, "global{@duration=%e;@max threads=%d;@nb events=%lld}\n",
duration, nbThreads, totalEvents);
......@@ -95,12 +97,14 @@ public:
ensureUniqueness.reserve(totalEvents);
for(int idxThread = 0 ; idxThread < nbThreads ; ++idxThread){
for(int idxEvent = 0 ; idxEvent < threadEvents[idxThread]->events.getSize() ; ++idxEvent){
const EventDescriptor& event = threadEvents[idxThread]->events[idxEvent];
fprintf(foutput, "event{@id=%lld;@duration=%e;@start=%e;@text=%s}\n",
event.eventId, event.duration, event.start, event.text);
FAssertLF(ensureUniqueness.find(event.eventId) == ensureUniqueness.end());
ensureUniqueness.insert(event.eventId);
if(threadEvents[idxThread]){
for(int idxEvent = 0 ; idxEvent < threadEvents[idxThread]->events.getSize() ; ++idxEvent){
const EventDescriptor& event = threadEvents[idxThread]->events[idxEvent];
fprintf(foutput, "event{@id=%lld;@duration=%e;@start=%e;@text=%s}\n",
event.eventId, event.duration, event.start, event.text);
FAssertLF(ensureUniqueness.find(event.eventId) == ensureUniqueness.end());
ensureUniqueness.insert(event.eventId);
}
}
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment