diff --git a/Benchmark/axpy/axpy.cpp b/Benchmark/axpy/axpy.cpp
index 448bd33c1cc5116a96f86f6097efe713bc08ee2e..58f1db0e2f2258d1a9adf09d51d12d847a24de43 100644
--- a/Benchmark/axpy/axpy.cpp
+++ b/Benchmark/axpy/axpy.cpp
@@ -253,8 +253,8 @@ int main(int argc, char** argv){
 
     for(auto useMultiprioAndPairs: schedPairConf){
         for(int idxGpu = -1 ; idxGpu <= nbGpus ; ++idxGpu){
-                const int nbCpus = (idxGpu == -1 ? 1 : SpUtils::DefaultNumThreads());
             const int nbGpus = (idxGpu == -1 ? 0 : idxGpu);
+            const int nbCpus = std::max(1 , (idxGpu == -1 ? 1 : SpUtils::DefaultNumThreads())-nbGpus);
             const bool useMultiprio = std::get<0>(useMultiprioAndPairs);
             const bool usePrioPairs = std::get<1>(useMultiprioAndPairs);
             const bool useLocality = std::get<2>(useMultiprioAndPairs);
@@ -291,8 +291,8 @@ int main(int argc, char** argv){
         for(int idxGpu = -1 ; idxGpu <= nbGpus ; ++idxGpu){
             for(int idxNbBlocks = minnbblocks ; idxNbBlocks <= maxnbblocks ; idxNbBlocks *= 2){
                 for(int idxSize = minblocksize ; idxSize <= maxblocksize ; idxSize *= 2){
-                const int nbCpus = (idxGpu == -1 ? 1 : SpUtils::DefaultNumThreads());
                     const int nbGpus = (idxGpu == -1 ? 0 : idxGpu);
+                    const int nbCpus = std::max(1 , (idxGpu == -1 ? 1 : SpUtils::DefaultNumThreads())-nbGpus);
 
                     file << nbCpus << "," << nbGpus << "," << idxNbBlocks << "," << idxSize << "," 
                     << (useMultiprio?"TRUE":"FALSE") << ","
diff --git a/Benchmark/cholesky_gemm/cholesky-mpi.cpp b/Benchmark/cholesky_gemm/cholesky-mpi.cpp
index 203986780f29affb472033ea5017d621dd41b77c..2236bfa43ab3cd11b18ec70cfb2bace903af3603 100644
--- a/Benchmark/cholesky_gemm/cholesky-mpi.cpp
+++ b/Benchmark/cholesky_gemm/cholesky-mpi.cpp
@@ -71,7 +71,7 @@ auto choleskyFactorization(const int NbLoops, SpBlas::Block blocksInput[], const
     else{
         scheduler = std::unique_ptr<SpAbstractScheduler>(new SpMultiPrioScheduler<MaxNbDevices,FavorLocality>(nbGpu*SpCudaUtils::GetDefaultNbStreams()));
     }
-    SpComputeEngine ce(SpWorkerTeamBuilder::TeamOfCpuGpuWorkers(SpUtils::DefaultNumThreads(), nbGpu), std::move(scheduler));
+    SpComputeEngine ce(SpWorkerTeamBuilder::TeamOfCpuGpuWorkers(std::max(1 , SpUtils::DefaultNumThreads()-nbGpu), nbGpu), std::move(scheduler));
 #else
     SpComputeEngine ce(SpWorkerTeamBuilder::TeamOfCpuWorkers());
 #endif
diff --git a/Benchmark/cholesky_gemm/cholesky.cpp b/Benchmark/cholesky_gemm/cholesky.cpp
index 6e85cb174a53327b43766a09401c95f479b6ca43..e514b44ae641cfe38cb81ef9c4505a4acbefe306 100644
--- a/Benchmark/cholesky_gemm/cholesky.cpp
+++ b/Benchmark/cholesky_gemm/cholesky.cpp
@@ -437,8 +437,8 @@ int main(int argc, char** argv){
         for(int idxGpu = -1 ; idxGpu <= nbGpus ; ++idxGpu){
             for(int BlockSize = MinBlockSize ; BlockSize <= MaxBlockSize ; BlockSize *= 2){
                 for(int MatrixSize = MinMatrixSize ; MatrixSize <= MaxMatrixSize ; MatrixSize *= 2){
-                const int nbCpus = (idxGpu == -1 ? 1 : SpUtils::DefaultNumThreads());
                     const int nbGpus = (idxGpu == -1 ? 0 : idxGpu);
+                    const int nbCpus = std::max(1 , (idxGpu == -1 ? 1 : SpUtils::DefaultNumThreads())-nbGpus);
 
                     const bool useMultiprio = std::get<0>(useMultiprioAndPairs);
                     const bool usePrioPairs = std::get<1>(useMultiprioAndPairs);
@@ -509,8 +509,8 @@ int main(int argc, char** argv){
         for(int idxGpu = -1 ; idxGpu <= nbGpus ; ++idxGpu){
             for(int BlockSize = MinBlockSize ; BlockSize <= MaxBlockSize ; BlockSize *= 2){
                 for(int MatrixSize = MinMatrixSize ; MatrixSize <= MaxMatrixSize ; MatrixSize *= 2){
-                const int nbCpus = (idxGpu == -1 ? 1 : SpUtils::DefaultNumThreads());
                     const int nbGpus = (idxGpu == -1 ? 0 : idxGpu);
+                    const int nbCpus = std::max(1 , (idxGpu == -1 ? 1 : SpUtils::DefaultNumThreads())-nbGpus);
 
                     const bool useMultiprio = std::get<0>(useMultiprioAndPairs);
                     const bool usePrioPairs = std::get<1>(useMultiprioAndPairs);
diff --git a/Benchmark/cholesky_gemm/gemm-mpi.cpp b/Benchmark/cholesky_gemm/gemm-mpi.cpp
index d2e01e82b04b012688257ed6c6da964095e9dd3e..8632cc3df2c1b9cf2c379d0a3e4e29f7145572b7 100644
--- a/Benchmark/cholesky_gemm/gemm-mpi.cpp
+++ b/Benchmark/cholesky_gemm/gemm-mpi.cpp
@@ -67,7 +67,7 @@ auto gemm(const int NbLoops, SpBlas::Block blocksC[], const SpBlas::Block blocks
     else{
         scheduler = std::unique_ptr<SpAbstractScheduler>(new SpMultiPrioScheduler<MaxNbDevices,FavorLocality>(nbGpu*SpCudaUtils::GetDefaultNbStreams()));
     }
-    SpComputeEngine ce(SpWorkerTeamBuilder::TeamOfCpuGpuWorkers(SpUtils::DefaultNumThreads(), nbGpu), std::move(scheduler));
+    SpComputeEngine ce(SpWorkerTeamBuilder::TeamOfCpuGpuWorkers(std::max(1, SpUtils::DefaultNumThreads()-nbGpu), nbGpu), std::move(scheduler));
 #else
     SpComputeEngine ce(SpWorkerTeamBuilder::TeamOfCpuWorkers());
 #endif
diff --git a/Benchmark/cholesky_gemm/gemm.cpp b/Benchmark/cholesky_gemm/gemm.cpp
index 49f7b34db0b54797240c18637808922e690385ab..27e9c07b2a559816f7f3406060b56cefbc213202 100644
--- a/Benchmark/cholesky_gemm/gemm.cpp
+++ b/Benchmark/cholesky_gemm/gemm.cpp
@@ -269,8 +269,8 @@ int main(int argc, char** argv){
         for(int BlockSize = MinBlockSize ; BlockSize <= MaxBlockSize ; BlockSize *= 2){
             for(int MatrixSize = MinMatrixSize ; MatrixSize <= MaxMatrixSize ; MatrixSize *= 2){
                 for(int idxGpu = -1 ; idxGpu <= nbGpus ; ++idxGpu){
-                const int nbCpus = (idxGpu == -1 ? 1 : SpUtils::DefaultNumThreads());
                     const int nbGpus = (idxGpu == -1 ? 0 : idxGpu);
+                    const int nbCpus = std::max(1 , (idxGpu == -1 ? 1 : SpUtils::DefaultNumThreads())-nbGpus);
 
                     const bool useMultiprio = std::get<0>(useMultiprioAndPairs);
                     const bool useLocality = std::get<1>(useMultiprioAndPairs);
@@ -342,8 +342,8 @@ int main(int argc, char** argv){
         for(int BlockSize = MinBlockSize ; BlockSize <= MaxBlockSize ; BlockSize *= 2){
             for(int MatrixSize = MinMatrixSize ; MatrixSize <= MaxMatrixSize ; MatrixSize *= 2){
                 for(int idxGpu = -1 ; idxGpu <= nbGpus ; ++idxGpu){
-                const int nbCpus = (idxGpu == -1 ? 1 : SpUtils::DefaultNumThreads());
                     const int nbGpus = (idxGpu == -1 ? 0 : idxGpu);
+                    const int nbCpus = std::max(1 , (idxGpu == -1 ? 1 : SpUtils::DefaultNumThreads())-nbGpus);
 
                     const bool useMultiprio = std::get<0>(useMultiprioAndPairs);
                     const bool useLocality = std::get<1>(useMultiprioAndPairs);
diff --git a/Benchmark/particles/particles-simu.cpp b/Benchmark/particles/particles-simu.cpp
index 94adf1d79692458b03f25d82b917d5422da23b6d..dafb4ecaa6ca4280b10219904bcf5415d3acf46a 100644
--- a/Benchmark/particles/particles-simu.cpp
+++ b/Benchmark/particles/particles-simu.cpp
@@ -919,8 +919,8 @@ void BenchmarkTest(int argc, char** argv, const TuneResult& inKernelConfig){
     for(auto useMultiprioAndPairs: schedPairConf){
         for(int idxGpu = -1 ; idxGpu <= nbGpus ; ++idxGpu){
             for(int idxBlock = MinNbGroups ; idxBlock <= MaxNbGroups ; idxBlock *= 2){
-                const int nbCpus = (idxGpu == -1 ? 1 : SpUtils::DefaultNumThreads());
                 const int nbGpus = (idxGpu == -1 ? 0 : idxGpu);
+                const int nbCpus = std::max(1 , (idxGpu == -1 ? 1 : SpUtils::DefaultNumThreads())-nbGpus);
                 const bool useMultiprio = std::get<0>(useMultiprioAndPairs);
                 const bool usePrioPairs = std::get<1>(useMultiprioAndPairs);
                 const bool useLocality = std::get<2>(useMultiprioAndPairs);
@@ -954,8 +954,8 @@ void BenchmarkTest(int argc, char** argv, const TuneResult& inKernelConfig){
     for(auto useMultiprioAndPairs: schedPairConf){
         for(int idxGpu = -1 ; idxGpu <= nbGpus ; ++idxGpu){
             for(int idxBlock = MinNbGroups ; idxBlock <= MaxNbGroups ; idxBlock *= 2){
-                const int nbCpus = (idxGpu == -1 ? 1 : SpUtils::DefaultNumThreads());
                 const int nbGpus = (idxGpu == -1 ? 0 : idxGpu);
+                const int nbCpus = std::max(1 , (idxGpu == -1 ? 1 : SpUtils::DefaultNumThreads())-nbGpus);
                 const bool useMultiprio = std::get<0>(useMultiprioAndPairs);
                 const bool usePrioPairs = std::get<1>(useMultiprioAndPairs);
                 const bool useLocality = std::get<2>(useMultiprioAndPairs);
diff --git a/Src/Compute/SpWorkerTeamBuilder.hpp b/Src/Compute/SpWorkerTeamBuilder.hpp
index 9c02b7d60401a189cf06bd7ed60fd5417159de1f..833e618675b2ab97a714e61d91e677b2a0dc3d67 100644
--- a/Src/Compute/SpWorkerTeamBuilder.hpp
+++ b/Src/Compute/SpWorkerTeamBuilder.hpp
@@ -42,9 +42,11 @@ static small_vector<std::unique_ptr<SpWorker>> TeamOfCudaWorkers(const int nbWor
     return res;
 }
 
-static small_vector<std::unique_ptr<SpWorker>> TeamOfCpuCudaWorkers(const int nbCpuWorkers = SpUtils::DefaultNumThreads(),
+static small_vector<std::unique_ptr<SpWorker>> TeamOfCpuCudaWorkers(const int nbCpuWorkersInit = -1,
                                              int nbCudaWorkers = SpCudaUtils::GetNbDevices(),
                                              const int nbWorkerPerCudas = SpCudaUtils::GetDefaultNbStreams()) {
+    const int nbCpuWorkers = (nbCpuWorkersInit != -1 ? nbCpuWorkersInit : std::max(1, SpUtils::DefaultNumThreads()-nbCudaWorkers));
+
     if(SpCudaUtils::GetNbDevices() < nbCudaWorkers){
         std::cout << "[SPECX] The number of devices asked ("
                   << nbCudaWorkers << ") is above the real number of devices ("
@@ -76,7 +78,7 @@ static auto TeamOfCpuGpuWorkers(Args&& ... args) {
 }
 #endif
 #ifdef SPECX_COMPILE_WITH_HIP
-static small_vector<std::unique_ptr<SpWorker>> TeamOfHipWorkers(const int nbWorkerPerHips = SpHipUtils::GetDefaultNbStreams(),
+static small_vector<std::unique_ptr<SpWorker>> TeamOfHipWorkers(const int nbWorkerPerHipsInit = SpHipUtils::GetDefaultNbStreams(),
                                              int nbHipWorkers = SpHipUtils::GetNbDevices()) {
     if(SpHipUtils::GetNbDevices() < nbHipWorkers){
         std::cout << "[SPECX] The number of devices asked ("
@@ -99,9 +101,10 @@ static small_vector<std::unique_ptr<SpWorker>> TeamOfHipWorkers(const int nbWork
     return res;
 }
 
-static small_vector<std::unique_ptr<SpWorker>> TeamOfCpuHipWorkers(const int nbCpuWorkers = SpUtils::DefaultNumThreads(),
+static small_vector<std::unique_ptr<SpWorker>> TeamOfCpuHipWorkers(const int nbCpuWorkers = -1,
                                              const int nbWorkerPerHips = SpHipUtils::GetDefaultNbStreams(),
                                              int nbHipWorkers = SpHipUtils::GetNbDevices()) {
+    const int nbCpuWorkers = (nbCpuWorkersInit != -1 ? nbCpuWorkersInit : std::max(1, SpUtils::DefaultNumThreads()-nbWorkerPerHips));
     if(SpHipUtils::GetNbDevices() < nbHipWorkers){
         std::cout << "[SPECX] The number of devices asked ("
                   << nbHipWorkers << ") is above the real number of devices ("