Commit 75174454 authored by BLANCHARD Pierre's avatar BLANCHARD Pierre
parents ed3d9297 a6e173f6
......@@ -9,6 +9,7 @@
#include "../../Utils/FLog.hpp"
#include "../../Utils/FTic.hpp"
#include "../../Utils/FAssert.hpp"
#include "../../Utils/FEnv.hpp"
#include "FOutOfBlockInteraction.hpp"
......@@ -109,6 +110,7 @@ protected:
#endif
const bool noCommuteAtLastLevel;
const bool noCommuteBetweenLevel;
#ifdef STARPU_USE_CPU
StarPUCpuWrapperClass cpuWrapper;
......@@ -142,7 +144,8 @@ public:
FGroupTaskStarPUAlgorithm(OctreeClass*const inTree, KernelClass* inKernels)
: tree(inTree), originalCpuKernel(inKernels),
cellHandles(nullptr),
noCommuteAtLastLevel(getenv("SCALFMM_NO_COMMUTE_LAST_L2L") != NULL && getenv("SCALFMM_NO_COMMUTE_LAST_L2L")[0] != '1'?false:true),
noCommuteAtLastLevel(FEnv::GetBool("SCALFMM_NO_COMMUTE_LAST_L2L", true)),
noCommuteBetweenLevel(FEnv::GetBool("SCALFMM_NO_COMMUTE_M2L_L2L", false)),
#ifdef STARPU_USE_CPU
cpuWrapper(tree->getHeight()),
#endif
......@@ -217,6 +220,7 @@ public:
FLOG(FLog::Controller << "FGroupTaskStarPUAlgorithm (Max CUDA " << starpu_cuda_worker_get_count() << ")\n");
#endif
FLOG(FLog::Controller << "SCALFMM_NO_COMMUTE_LAST_L2L " << noCommuteAtLastLevel << "\n");
FLOG(FLog::Controller << "SCALFMM_NO_COMMUTE_M2L_L2L " << noCommuteBetweenLevel << "\n");
buildTaskNames();
}
......@@ -1210,8 +1214,8 @@ protected:
task->dyn_handles[3] = cellHandles[idxLevel+1][idxSubGroup].down;
// put the right codelet
if(noCommuteAtLastLevel){
task->cl = (idxLevel == FAbstractAlgorithm::lowerWorkingLevel - 2 ? &l2l_cl_nocommute : &l2l_cl);
if((noCommuteAtLastLevel && (idxLevel == FAbstractAlgorithm::lowerWorkingLevel - 2)) || noCommuteBetweenLevel){
task->cl = &l2l_cl_nocommute;
}
else{
task->cl = &l2l_cl;
......@@ -1249,8 +1253,8 @@ protected:
task->dyn_handles[3] = cellHandles[idxLevel+1][idxSubGroup].down;
// put the right codelet
if(noCommuteAtLastLevel){
task->cl = (idxLevel == FAbstractAlgorithm::lowerWorkingLevel - 2 ? &l2l_cl_nocommute : &l2l_cl);
if((noCommuteAtLastLevel && (idxLevel == FAbstractAlgorithm::lowerWorkingLevel - 2)) || noCommuteBetweenLevel){
task->cl = &l2l_cl_nocommute;
}
else{
task->cl = &l2l_cl;
......
......@@ -132,6 +132,14 @@ public:
#ifdef STARPU_USE_CPU
// CPU follows the real prio
{
bool insertInnerP2PLater = false;
#ifdef STARPU_USE_CUDA
insertInnerP2PLater = capacities->supportP2P(FSTARPU_CUDA_IDX);
#endif
#ifdef STARPU_USE_OPENCL
insertInnerP2PLater = capacities->supportP2P(FSTARPU_OPENCL_IDX);
#endif
int cpuCountPrio = 0;
if( !workOnlyOnLeaves && capacities->supportP2M(FSTARPU_CPU_IDX)){
......@@ -152,7 +160,7 @@ public:
heteroprio->prio_mapping_per_arch_index[FSTARPU_CPU_IDX][cpuCountPrio++] = insertionPositionM2M;
heteroprio->buckets[insertionPositionM2M].valide_archs |= STARPU_CPU;
}
if( capacities->supportP2P(FSTARPU_CPU_IDX)){
if( capacities->supportP2P(FSTARPU_CPU_IDX) && !insertInnerP2PLater){
FLOG( FLog::Controller << "\t CPU prio P2P " << cpuCountPrio << " bucket " << insertionPositionP2P << "\n" );
heteroprio->prio_mapping_per_arch_index[FSTARPU_CPU_IDX][cpuCountPrio++] = insertionPositionP2P;
heteroprio->buckets[insertionPositionP2P].valide_archs |= STARPU_CPU;
......@@ -180,7 +188,7 @@ public:
#ifndef STARPU_USE_REDUX
if( capacities->supportP2PExtern(FSTARPU_CPU_IDX)
&& insertionPositionP2P != insertionPositionP2PExtern){
&& insertionPositionP2P != insertionPositionP2PExtern && !insertInnerP2PLater){
FLOG( FLog::Controller << "\t CPU prio P2P Extern " << cpuCountPrio << " bucket " << insertionPositionP2PExtern << "\n" );
heteroprio->prio_mapping_per_arch_index[FSTARPU_CPU_IDX][cpuCountPrio++] = insertionPositionP2PExtern;
heteroprio->buckets[insertionPositionP2PExtern].valide_archs |= STARPU_CPU;
......@@ -203,6 +211,19 @@ public:
heteroprio->prio_mapping_per_arch_index[FSTARPU_CPU_IDX][cpuCountPrio++] = insertionPositionL2P;
heteroprio->buckets[insertionPositionL2P].valide_archs |= STARPU_CPU;
}
#ifndef STARPU_USE_REDUX
if( capacities->supportP2PExtern(FSTARPU_CPU_IDX)
&& insertionPositionP2P != insertionPositionP2PExtern && insertInnerP2PLater){
FLOG( FLog::Controller << "\t CPU prio P2P Extern " << cpuCountPrio << " bucket " << insertionPositionP2PExtern << "\n" );
heteroprio->prio_mapping_per_arch_index[FSTARPU_CPU_IDX][cpuCountPrio++] = insertionPositionP2PExtern;
heteroprio->buckets[insertionPositionP2PExtern].valide_archs |= STARPU_CPU;
}
#endif
if( capacities->supportP2P(FSTARPU_CPU_IDX) && insertInnerP2PLater){
FLOG( FLog::Controller << "\t CPU prio P2P " << cpuCountPrio << " bucket " << insertionPositionP2P << "\n" );
heteroprio->prio_mapping_per_arch_index[FSTARPU_CPU_IDX][cpuCountPrio++] = insertionPositionP2P;
heteroprio->buckets[insertionPositionP2P].valide_archs |= STARPU_CPU;
}
#ifdef STARPU_USE_REDUX
if( capacities->supportP2PExtern(FSTARPU_CPU_IDX)){
FLOG( FLog::Controller << "\t CPU prio P2P Extern " << cpuCountPrio << " bucket " << insertionPositionP2PExtern << "\n" );
......@@ -287,6 +308,7 @@ public:
#endif
}
if(capacities->supportP2PExtern(FSTARPU_CUDA_IDX)
&& insertionPositionP2P != insertionPositionP2PExtern){
FLOG( FLog::Controller << "\t CUDA prio P2P ex " << cudaCountPrio << " bucket " << insertionPositionP2PExtern << "\n" );
......
......@@ -66,7 +66,7 @@ public:
if(!value){
return defaultValue;
}
return (strcmp(value,"TRUE") == 0) || (strcmp(value,"true") == 0);
return (strcmp(value,"TRUE") == 0) || (strcmp(value,"true") == 0) || (strcmp(value,"1") == 0);
}
/**
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment