Mise à jour terminée. Pour connaître les apports de la version 13.8.4 par rapport à notre ancienne version vous pouvez lire les "Release Notes" suivantes :
https://about.gitlab.com/releases/2021/02/11/security-release-gitlab-13-8-4-released/
https://about.gitlab.com/releases/2021/02/05/gitlab-13-8-3-released/

Commit fdaf5b56 authored by BRAMAS Berenger's avatar BRAMAS Berenger

manage the case when we use P2P only in the block version

parent 0f084a95
...@@ -237,6 +237,7 @@ public: ...@@ -237,6 +237,7 @@ public:
void execute(const unsigned operationsToProceed = FFmmNearAndFarFields){ void execute(const unsigned operationsToProceed = FFmmNearAndFarFields){
FLOG( FLog::Controller << "\tStart FGroupTaskStarPUAlgorithm\n" ); FLOG( FLog::Controller << "\tStart FGroupTaskStarPUAlgorithm\n" );
const bool directOnly = (tree->getHeight() <= 2);
#pragma omp parallel #pragma omp parallel
#pragma omp single #pragma omp single
...@@ -246,17 +247,17 @@ public: ...@@ -246,17 +247,17 @@ public:
starpu_resume(); starpu_resume();
if(operationsToProceed & FFmmP2M) bottomPass(); if(operationsToProceed & FFmmP2M && !directOnly) bottomPass();
if(operationsToProceed & FFmmM2M) upwardPass(); if(operationsToProceed & FFmmM2M && !directOnly) upwardPass();
if(operationsToProceed & FFmmM2L) transferPass(); if(operationsToProceed & FFmmM2L && !directOnly) transferPass();
if(operationsToProceed & FFmmL2L) downardPass(); if(operationsToProceed & FFmmL2L && !directOnly) downardPass();
if( operationsToProceed & FFmmP2P ) directPass(); if( operationsToProceed & FFmmP2P ) directPass();
if( operationsToProceed & FFmmL2P ) mergePass(); if( operationsToProceed & FFmmL2P && !directOnly) mergePass();
starpu_task_wait_for_all(); starpu_task_wait_for_all();
starpu_pause(); starpu_pause();
......
...@@ -256,6 +256,7 @@ public: ...@@ -256,6 +256,7 @@ public:
void execute(const unsigned operationsToProceed = FFmmNearAndFarFields){ void execute(const unsigned operationsToProceed = FFmmNearAndFarFields){
FLOG( FLog::Controller << "\tStart FGroupTaskStarPUMpiAlgorithm\n" ); FLOG( FLog::Controller << "\tStart FGroupTaskStarPUMpiAlgorithm\n" );
const bool directOnly = (tree->getHeight() <= 2);
#pragma omp parallel #pragma omp parallel
#pragma omp single #pragma omp single
...@@ -271,20 +272,20 @@ public: ...@@ -271,20 +272,20 @@ public:
if( operationsToProceed & FFmmP2P ) insertParticlesSend(); if( operationsToProceed & FFmmP2P ) insertParticlesSend();
if(operationsToProceed & FFmmP2M) bottomPass(); if(operationsToProceed & FFmmP2M && !directOnly) bottomPass();
if(operationsToProceed & FFmmM2M) upwardPass(); if(operationsToProceed & FFmmM2M && !directOnly) upwardPass();
if(operationsToProceed & FFmmM2L) insertCellsSend(); if(operationsToProceed & FFmmM2L && !directOnly) insertCellsSend();
if(operationsToProceed & FFmmM2L) transferPass(); if(operationsToProceed & FFmmM2L && !directOnly) transferPass();
if(operationsToProceed & FFmmM2L) transferPassMpi(); if(operationsToProceed & FFmmM2L && !directOnly) transferPassMpi();
if(operationsToProceed & FFmmL2L) downardPass(); if(operationsToProceed & FFmmL2L && !directOnly) downardPass();
if( operationsToProceed & FFmmP2P ) directPass(); if( operationsToProceed & FFmmP2P ) directPass();
if( operationsToProceed & FFmmP2P ) directPassMpi(); if( operationsToProceed & FFmmP2P ) directPassMpi();
if( operationsToProceed & FFmmL2P ) mergePass(); if( operationsToProceed & FFmmL2P && !directOnly) mergePass();
starpu_task_wait_for_all(); starpu_task_wait_for_all();
starpu_pause(); starpu_pause();
...@@ -918,26 +919,29 @@ protected: ...@@ -918,26 +919,29 @@ protected:
void postRecvAllocatedBlocks(){ void postRecvAllocatedBlocks(){
std::vector<MpiDependency> toRecv; std::vector<MpiDependency> toRecv;
FAssertLF(tree->getHeight() == int(remoteCellGroups.size())); FAssertLF(tree->getHeight() == int(remoteCellGroups.size()));
for(int idxLevel = 0 ; idxLevel < tree->getHeight() ; ++idxLevel){ const bool directOnly = (tree->getHeight() <= 2);
for(int idxHandle = 0 ; idxHandle < int(remoteCellGroups[idxLevel].size()) ; ++idxHandle){ if(!directOnly){
if(remoteCellGroups[idxLevel][idxHandle].ptrSymb){ for(int idxLevel = 0 ; idxLevel < tree->getHeight() ; ++idxLevel){
FAssertLF(remoteCellGroups[idxLevel][idxHandle].ptrUp); for(int idxHandle = 0 ; idxHandle < int(remoteCellGroups[idxLevel].size()) ; ++idxHandle){
FLOG(FLog::Controller << "[SMpi] " << idxLevel << " Post a recv during M2L for Idx " << processesBlockInfos[idxLevel][idxHandle].firstIndex << if(remoteCellGroups[idxLevel][idxHandle].ptrSymb){
" and dest is " << processesBlockInfos[idxLevel][idxHandle].owner << " tag " << getTag(idxLevel,processesBlockInfos[idxLevel][idxHandle].firstIndex, 0) << "\n"); FAssertLF(remoteCellGroups[idxLevel][idxHandle].ptrUp);
FLOG(FLog::Controller << "[SMpi] " << idxLevel << " Post a recv during M2L for Idx " << processesBlockInfos[idxLevel][idxHandle].firstIndex << FLOG(FLog::Controller << "[SMpi] " << idxLevel << " Post a recv during M2L for Idx " << processesBlockInfos[idxLevel][idxHandle].firstIndex <<
" and dest is " << processesBlockInfos[idxLevel][idxHandle].owner << " tag " << getTag(idxLevel,processesBlockInfos[idxLevel][idxHandle].firstIndex, 1) << "\n"); " and dest is " << processesBlockInfos[idxLevel][idxHandle].owner << " tag " << getTag(idxLevel,processesBlockInfos[idxLevel][idxHandle].firstIndex, 0) << "\n");
FLOG(FLog::Controller << "[SMpi] " << idxLevel << " Post a recv during M2L for Idx " << processesBlockInfos[idxLevel][idxHandle].firstIndex <<
starpu_mpi_irecv_detached( remoteCellGroups[idxLevel][idxHandle].handleSymb, " and dest is " << processesBlockInfos[idxLevel][idxHandle].owner << " tag " << getTag(idxLevel,processesBlockInfos[idxLevel][idxHandle].firstIndex, 1) << "\n");
processesBlockInfos[idxLevel][idxHandle].owner,
getTag(idxLevel,processesBlockInfos[idxLevel][idxHandle].firstIndex, 0), starpu_mpi_irecv_detached( remoteCellGroups[idxLevel][idxHandle].handleSymb,
comm.getComm(), 0, 0 ); processesBlockInfos[idxLevel][idxHandle].owner,
starpu_mpi_irecv_detached( remoteCellGroups[idxLevel][idxHandle].handleUp, getTag(idxLevel,processesBlockInfos[idxLevel][idxHandle].firstIndex, 0),
processesBlockInfos[idxLevel][idxHandle].owner, comm.getComm(), 0, 0 );
getTag(idxLevel,processesBlockInfos[idxLevel][idxHandle].firstIndex, 1), starpu_mpi_irecv_detached( remoteCellGroups[idxLevel][idxHandle].handleUp,
comm.getComm(), 0, 0 ); processesBlockInfos[idxLevel][idxHandle].owner,
getTag(idxLevel,processesBlockInfos[idxLevel][idxHandle].firstIndex, 1),
toRecv.push_back({processesBlockInfos[idxLevel][idxHandle].owner, comm.getComm(), 0, 0 );
comm.processId(), idxLevel, idxHandle});
toRecv.push_back({processesBlockInfos[idxLevel][idxHandle].owner,
comm.processId(), idxLevel, idxHandle});
}
} }
} }
} }
......
...@@ -59,40 +59,65 @@ public: ...@@ -59,40 +59,65 @@ public:
treeHeight = inTreeHeight; treeHeight = inTreeHeight;
int incPrio = 0; if(inTreeHeight > 2){
int incPrio = 0;
prioP2MSend = incPrio++; prioP2MSend = incPrio++;
prioP2M = incPrio++; prioP2M = incPrio++;
prioM2MSend = incPrio++; prioM2MSend = incPrio++;
prioM2M = incPrio++; prioM2M = incPrio++;
prioM2L = incPrio; prioM2L = incPrio;
prioM2LExtern = incPrio; prioM2LExtern = incPrio;
prioM2LMpi = incPrio++; prioM2LMpi = incPrio++;
prioL2L = incPrio++; prioL2L = incPrio++;
incPrio += (treeHeight-2)-1 // M2L is done treeHeight-2 times incPrio += (treeHeight-2)-1 // M2L is done treeHeight-2 times
+(treeHeight-3)-1; // L2L is done treeHeight-3 times +(treeHeight-3)-1; // L2L is done treeHeight-3 times
prioP2P = incPrio; prioP2P = incPrio;
prioP2PExtern = incPrio; prioP2PExtern = incPrio;
prioP2PMpi = incPrio++; prioP2PMpi = incPrio++;
prioL2P = incPrio++; prioL2P = incPrio++;
assert(incPrio == 6 + (treeHeight-2) + (treeHeight-3)); assert(incPrio == 6 + (treeHeight-2) + (treeHeight-3));
}
else{
int incPrio = 0;
prioP2MSend = -1;
prioP2M = -1;
prioM2MSend = -1;
prioM2M = -1;
prioM2L = -1;
prioM2LExtern = -1;
prioM2LMpi = -1;
prioL2L = -1;
prioP2P = incPrio;
prioP2PExtern = incPrio;
prioP2PMpi = incPrio++;
prioL2P = -1;
assert(incPrio == 1);
}
} }
void initSchedulerCallback(unsigned /*sched_ctx_id*/, void initSchedulerCallback(unsigned /*sched_ctx_id*/,
struct _starpu_heteroprio_center_policy_heteroprio *heteroprio){ struct _starpu_heteroprio_center_policy_heteroprio *heteroprio){
const bool workOnlyOnLeaves = (treeHeight <= 2);
#ifdef STARPU_USE_CPU #ifdef STARPU_USE_CPU
// CPU follows the real prio // CPU follows the real prio
{ {
int cpuCountPrio = 0; int cpuCountPrio = 0;
//prioP2MSend = 0; //prioP2MSend = 0;
//prioP2M = prioP2MSend+1; //prioP2M = prioP2MSend+1;
if(capacities->supportP2M(FSTARPU_CPU_IDX)){ if( !workOnlyOnLeaves && capacities->supportP2M(FSTARPU_CPU_IDX)){
heteroprio->prio_mapping_per_arch_index[FSTARPU_CPU_IDX][cpuCountPrio++] = prioP2MSend; heteroprio->prio_mapping_per_arch_index[FSTARPU_CPU_IDX][cpuCountPrio++] = prioP2MSend;
heteroprio->buckets[prioP2MSend].valide_archs |= STARPU_CPU; heteroprio->buckets[prioP2MSend].valide_archs |= STARPU_CPU;
...@@ -101,8 +126,8 @@ public: ...@@ -101,8 +126,8 @@ public:
} }
//prioM2MSend = prioP2M+1; //prioM2MSend = prioP2M+1;
//prioM2M = prioM2MSend+1; //prioM2M = prioM2MSend+1;
assert(cpuCountPrio == prioM2MSend); // True if CPU support all TODO //assert(cpuCountPrio == prioM2MSend); // True if CPU support all TODO
if(capacities->supportM2M(FSTARPU_CPU_IDX)){ if(!workOnlyOnLeaves && capacities->supportM2M(FSTARPU_CPU_IDX)){
heteroprio->prio_mapping_per_arch_index[FSTARPU_CPU_IDX][cpuCountPrio++] = prioM2MSend; heteroprio->prio_mapping_per_arch_index[FSTARPU_CPU_IDX][cpuCountPrio++] = prioM2MSend;
heteroprio->buckets[prioM2MSend].valide_archs |= STARPU_CPU; heteroprio->buckets[prioM2MSend].valide_archs |= STARPU_CPU;
...@@ -114,7 +139,7 @@ public: ...@@ -114,7 +139,7 @@ public:
// prioM2LExtern = prioM2L; // prioM2LExtern = prioM2L;
// prioM2LMpi = prioM2L; // prioM2LMpi = prioM2L;
// prioL2L = prioM2L+1; // prioL2L = prioM2L+1;
assert(cpuCountPrio == prioM2L); // True if CPU support all TODO // assert(cpuCountPrio == prioM2L); // True if CPU support all TODO
for(int idxLevel = 2 ; idxLevel < treeHeight ; ++idxLevel){ for(int idxLevel = 2 ; idxLevel < treeHeight ; ++idxLevel){
if(capacities->supportM2L(FSTARPU_CPU_IDX)){ if(capacities->supportM2L(FSTARPU_CPU_IDX)){
const int prioM2LAtLevel = getPrioM2L(idxLevel); const int prioM2LAtLevel = getPrioM2L(idxLevel);
...@@ -127,19 +152,19 @@ public: ...@@ -127,19 +152,19 @@ public:
heteroprio->buckets[prioL2LAtLevel].valide_archs |= STARPU_CPU; heteroprio->buckets[prioL2LAtLevel].valide_archs |= STARPU_CPU;
} }
} }
assert(cpuCountPrio == prioP2P); // True if CPU support all TODO // assert(cpuCountPrio == prioP2P); // True if CPU support all TODO
//prioP2P = prioL2L + (treeHeight-3)*2+1 +1; //prioP2P = prioL2L + (treeHeight-3)*2+1 +1;
//prioP2PExtern = prioP2P; //prioP2PExtern = prioP2P;
//prioP2PMpi = prioP2P; //prioP2PMpi = prioP2P;
if(capacities->supportP2P(FSTARPU_CPU_IDX)){ if( capacities->supportP2P(FSTARPU_CPU_IDX)){
heteroprio->prio_mapping_per_arch_index[FSTARPU_CPU_IDX][cpuCountPrio++] = prioP2P; heteroprio->prio_mapping_per_arch_index[FSTARPU_CPU_IDX][cpuCountPrio++] = prioP2P;
heteroprio->buckets[prioP2P].valide_archs |= STARPU_CPU; heteroprio->buckets[prioP2P].valide_archs |= STARPU_CPU;
} }
assert(cpuCountPrio == prioL2P); // True if CPU support all TODO //assert(cpuCountPrio == prioL2P); // True if CPU support all TODO
//prioL2P = prioP2PMpi+1; //prioL2P = prioP2PMpi+1;
if(capacities->supportL2P(FSTARPU_CPU_IDX)){ if( !workOnlyOnLeaves && capacities->supportL2P(FSTARPU_CPU_IDX)){
heteroprio->prio_mapping_per_arch_index[FSTARPU_CPU_IDX][cpuCountPrio++] = prioL2P; heteroprio->prio_mapping_per_arch_index[FSTARPU_CPU_IDX][cpuCountPrio++] = prioL2P;
heteroprio->buckets[prioL2P].valide_archs |= STARPU_CPU; heteroprio->buckets[prioL2P].valide_archs |= STARPU_CPU;
} }
...@@ -180,7 +205,7 @@ public: ...@@ -180,7 +205,7 @@ public:
//prioP2MSend = 0; //prioP2MSend = 0;
//prioP2M = prioP2MSend+1; //prioP2M = prioP2MSend+1;
if(capacities->supportP2M(FSTARPU_OPENCL_IDX)){ if( !workOnlyOnLeaves && capacities->supportP2M(FSTARPU_OPENCL_IDX)){
heteroprio->prio_mapping_per_arch_index[FSTARPU_OPENCL_IDX][openclCountPrio++] = prioP2MSend; heteroprio->prio_mapping_per_arch_index[FSTARPU_OPENCL_IDX][openclCountPrio++] = prioP2MSend;
heteroprio->buckets[prioP2MSend].valide_archs |= STARPU_OPENCL; heteroprio->buckets[prioP2MSend].valide_archs |= STARPU_OPENCL;
...@@ -190,7 +215,7 @@ public: ...@@ -190,7 +215,7 @@ public:
//prioM2MSend = prioP2M+1; //prioM2MSend = prioP2M+1;
//prioM2M = prioM2MSend+1; //prioM2M = prioM2MSend+1;
if(capacities->supportM2M(FSTARPU_OPENCL_IDX)){ if( !workOnlyOnLeaves && capacities->supportM2M(FSTARPU_OPENCL_IDX)){
heteroprio->prio_mapping_per_arch_index[FSTARPU_OPENCL_IDX][openclCountPrio++] = prioM2MSend; heteroprio->prio_mapping_per_arch_index[FSTARPU_OPENCL_IDX][openclCountPrio++] = prioM2MSend;
heteroprio->buckets[prioM2MSend].valide_archs |= STARPU_OPENCL; heteroprio->buckets[prioM2MSend].valide_archs |= STARPU_OPENCL;
...@@ -208,7 +233,7 @@ public: ...@@ -208,7 +233,7 @@ public:
} }
//prioL2P = prioP2PMpi+1; //prioL2P = prioP2PMpi+1;
if(capacities->supportL2P(FSTARPU_OPENCL_IDX)){ if( !workOnlyOnLeaves && capacities->supportL2P(FSTARPU_OPENCL_IDX)){
heteroprio->prio_mapping_per_arch_index[FSTARPU_OPENCL_IDX][openclCountPrio++] = prioL2P; heteroprio->prio_mapping_per_arch_index[FSTARPU_OPENCL_IDX][openclCountPrio++] = prioL2P;
heteroprio->buckets[prioL2P].valide_archs |= STARPU_OPENCL; heteroprio->buckets[prioL2P].valide_archs |= STARPU_OPENCL;
} }
...@@ -249,7 +274,7 @@ public: ...@@ -249,7 +274,7 @@ public:
//prioP2MSend = 0; //prioP2MSend = 0;
//prioP2M = prioP2MSend+1; //prioP2M = prioP2MSend+1;
if(capacities->supportP2M(FSTARPU_CUDA_IDX)){ if( !workOnlyOnLeaves && capacities->supportP2M(FSTARPU_CUDA_IDX)){
heteroprio->prio_mapping_per_arch_index[FSTARPU_CUDA_IDX][openclCountPrio++] = prioP2MSend; heteroprio->prio_mapping_per_arch_index[FSTARPU_CUDA_IDX][openclCountPrio++] = prioP2MSend;
heteroprio->buckets[prioP2MSend].valide_archs |= STARPU_CUDA; heteroprio->buckets[prioP2MSend].valide_archs |= STARPU_CUDA;
...@@ -259,7 +284,7 @@ public: ...@@ -259,7 +284,7 @@ public:
//prioM2MSend = prioP2M+1; //prioM2MSend = prioP2M+1;
//prioM2M = prioM2MSend+1; //prioM2M = prioM2MSend+1;
if(capacities->supportM2M(FSTARPU_CUDA_IDX)){ if( !workOnlyOnLeaves && capacities->supportM2M(FSTARPU_CUDA_IDX)){
heteroprio->prio_mapping_per_arch_index[FSTARPU_CUDA_IDX][openclCountPrio++] = prioM2MSend; heteroprio->prio_mapping_per_arch_index[FSTARPU_CUDA_IDX][openclCountPrio++] = prioM2MSend;
heteroprio->buckets[prioM2MSend].valide_archs |= STARPU_CUDA; heteroprio->buckets[prioM2MSend].valide_archs |= STARPU_CUDA;
...@@ -277,7 +302,7 @@ public: ...@@ -277,7 +302,7 @@ public:
} }
//prioL2P = prioP2PMpi+1; //prioL2P = prioP2PMpi+1;
if(capacities->supportL2P(FSTARPU_CUDA_IDX)){ if( !workOnlyOnLeaves && capacities->supportL2P(FSTARPU_CUDA_IDX)){
heteroprio->prio_mapping_per_arch_index[FSTARPU_CUDA_IDX][openclCountPrio++] = prioL2P; heteroprio->prio_mapping_per_arch_index[FSTARPU_CUDA_IDX][openclCountPrio++] = prioL2P;
heteroprio->buckets[prioL2P].valide_archs |= STARPU_CUDA; heteroprio->buckets[prioL2P].valide_archs |= STARPU_CUDA;
} }
......
...@@ -48,7 +48,8 @@ int main(int argc, char* argv[]){ ...@@ -48,7 +48,8 @@ int main(int argc, char* argv[]){
"The size of the block of the blocked tree" "The size of the block of the blocked tree"
}; };
FHelpDescribeAndExit(argc, argv, "Test the blocked tree by counting the particles.", FHelpDescribeAndExit(argc, argv, "Test the blocked tree by counting the particles.",
FParameterDefinitions::OctreeHeight, FParameterDefinitions::NbParticles, LocalOptionBlocSize); FParameterDefinitions::OctreeHeight, FParameterDefinitions::NbParticles,
FParameterDefinitions::OctreeSubHeight, LocalOptionBlocSize);
typedef double FReal; typedef double FReal;
...@@ -101,7 +102,8 @@ int main(int argc, char* argv[]){ ...@@ -101,7 +102,8 @@ int main(int argc, char* argv[]){
FAssertLF(loader.isOpen()); FAssertLF(loader.isOpen());
// Usual octree // Usual octree
OctreeClass tree(NbLevels, 2, loader.getBoxWidth(), loader.getCenterOfBox()); OctreeClass tree(NbLevels, FParameters::getValue(argc,argv,FParameterDefinitions::OctreeSubHeight.options, 2),
loader.getBoxWidth(), loader.getCenterOfBox());
FTestParticleContainer<FReal> allParticles; FTestParticleContainer<FReal> allParticles;
for(FSize idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){ for(FSize idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment