diff --git a/Batch/run-benchs.sh b/Batch/run-benchs.sh index 9ea8c9bc5ef1ccd2006d6f7b31a6ddd09a0f0823..3340dd6e81fba908778e1dfd7bbbc322bb4c3075 100644 --- a/Batch/run-benchs.sh +++ b/Batch/run-benchs.sh @@ -54,18 +54,18 @@ function main(){ NB_LOOPS=10 # AXPY - "$RUN_DIR/Benchmark/axpy/axpy" --lp=$NB_LOOPS --minnbb=16 --maxnbb=256 --minbs=128 --maxbs=65536 --gputh=256 --od="$results_dir" >> "$results_dir/output_axpy.txt" + "$RUN_DIR/Benchmark/axpy/axpy" --lp=$NB_LOOPS --minnbb=16 --maxnbb=256 --minbs=128 --maxbs=131072 --gputh=256 --od="$results_dir" >> "$results_dir/output_axpy.txt" remove_core_files # Cholesky/gemm - "$RUN_DIR/Benchmark/cholesky_gemm/cholesky" --lp=$NB_LOOPS --minms=4096 --maxms=8192 --minbs=128 --maxbs=512 --od="$results_dir" >> "$results_dir/output_cholesky.txt" + "$RUN_DIR/Benchmark/cholesky_gemm/cholesky" --lp=$NB_LOOPS --minms=4096 --maxms=16384 --minbs=128 --maxbs=512 --od="$results_dir" >> "$results_dir/output_cholesky.txt" remove_core_files - "$RUN_DIR/Benchmark/cholesky_gemm/gemm" --lp=$NB_LOOPS --minms=4096 --maxms=8192 --minbs=128 --maxbs=512 --od="$results_dir" >> "$results_dir/output_gemm.txt" + "$RUN_DIR/Benchmark/cholesky_gemm/gemm" --lp=$NB_LOOPS --minms=4096 --maxms=16384 --minbs=128 --maxbs=512 --od="$results_dir" >> "$results_dir/output_gemm.txt" remove_core_files # Particles - "$RUN_DIR/Benchmark/particles/particles-simu" --lp=$NB_LOOPS --minp=500 --maxp=8000 --minnbgroups=128 --maxnbgroups=512 --od="$results_dir" >> "$results_dir/output_particles.txt" + "$RUN_DIR/Benchmark/particles/particles-simu" --lp=$NB_LOOPS --minp=500 --maxp=15000 --minnbgroups=128 --maxnbgroups=1024 --od="$results_dir" >> "$results_dir/output_particles.txt" remove_core_files } diff --git a/Benchmark/axpy/axpy.cpp b/Benchmark/axpy/axpy.cpp index 77e17c9c2e90a04a51c689f247fb7c2377953d9d..448bd33c1cc5116a96f86f6097efe713bc08ee2e 100644 --- a/Benchmark/axpy/axpy.cpp +++ b/Benchmark/axpy/axpy.cpp @@ -77,7 +77,7 @@ __global__ void cu_axpy(int n, NumType a, NumType *x, NumType *y, NumType *out) template <int MaxNbDevices, const bool FavorLocality> -auto BenchmarkTest(const int NbLoops, const int nbGpu, const int nbblocks, const int blocksize, const int gpunbthreads, +auto BenchmarkTest(const int NbLoops, const int nbCpu, const int nbGpu, const int nbblocks, const int blocksize, const int gpunbthreads, const bool useMultiPrioScheduler){ std::vector<Vector<float>> x(nbblocks, Vector<float>(blocksize, 1)); std::vector<Vector<float>> y(nbblocks, Vector<float>(blocksize, 1)); @@ -92,9 +92,9 @@ auto BenchmarkTest(const int NbLoops, const int nbGpu, const int nbblocks, const else{ scheduler = std::unique_ptr<SpAbstractScheduler>(new SpMultiPrioScheduler<MaxNbDevices,FavorLocality>(nbGpu*SpCudaUtils::GetDefaultNbStreams())); } - SpComputeEngine ce(SpWorkerTeamBuilder::TeamOfCpuGpuWorkers(SpUtils::DefaultNumThreads(), nbGpu), std::move(scheduler)); + SpComputeEngine ce(SpWorkerTeamBuilder::TeamOfCpuGpuWorkers(nbCpu, nbGpu), std::move(scheduler)); #else - SpComputeEngine ce(SpWorkerTeamBuilder::TeamOfCpuWorkers()); + SpComputeEngine ce(SpWorkerTeamBuilder::TeamOfCpuWorkers(nbCpu)); #endif std::vector<double> minMaxAvg(3); @@ -252,7 +252,9 @@ int main(int argc, char** argv){ std::make_tuple(true, false, true)}; for(auto useMultiprioAndPairs: schedPairConf){ - for(int idxGpu = 0 ; idxGpu <= nbGpus ; ++idxGpu){ + for(int idxGpu = -1 ; idxGpu <= nbGpus ; ++idxGpu){ + const int nbCpus = (idxGpu == -1 ? 1 : SpUtils::DefaultNumThreads()); + const int nbGpus = (idxGpu == -1 ? 0 : idxGpu); const bool useMultiprio = std::get<0>(useMultiprioAndPairs); const bool usePrioPairs = std::get<1>(useMultiprioAndPairs); const bool useLocality = std::get<2>(useMultiprioAndPairs); @@ -262,8 +264,8 @@ int main(int argc, char** argv){ std::cout << " - NbBlocks = " << idxNbBlocks << " BlockSize = " << idxSize << std::endl; const auto minMaxAvg = (useLocality ? - BenchmarkTest<8,true>(NbLoops, idxGpu, idxNbBlocks, idxSize, gpunbthreads, useMultiprio) - : BenchmarkTest<8,false>(NbLoops, idxGpu, idxNbBlocks, idxSize, gpunbthreads, useMultiprio)); + BenchmarkTest<8,true>(NbLoops, nbCpus, nbGpus, idxNbBlocks, idxSize, gpunbthreads, useMultiprio) + : BenchmarkTest<8,false>(NbLoops, nbCpus, nbGpus, idxNbBlocks, idxSize, gpunbthreads, useMultiprio)); std::cout << " - Duration = " << minMaxAvg[0] << " " << minMaxAvg[1] << " " << minMaxAvg[2] << std::endl; std::cout << " - Transfers = " << minMaxAvg[3] << " " << minMaxAvg[4] << " " << minMaxAvg[5] << " " << minMaxAvg[6] << " " << minMaxAvg[7] << std::endl; std::cout << " - End" << std::endl; @@ -280,16 +282,19 @@ int main(int argc, char** argv){ return 1; } - file << "NbGpu,NbBlocks,BlockSize,Multiprio,PrioPair,FavorLocality,MinDuration,MaxDuration,AvgDuration,TotalTransfer,MaxTransfer,DeviceToHostTransfer,HostToDeviceTransfer,DeviceToDeviceTransfer" << std::endl; + file << "NbCpus,NbGpu,NbBlocks,BlockSize,Multiprio,PrioPair,FavorLocality,MinDuration,MaxDuration,AvgDuration,TotalTransfer,MaxTransfer,DeviceToHostTransfer,HostToDeviceTransfer,DeviceToDeviceTransfer" << std::endl; int idxDuration = 0; for(auto useMultiprioAndPairs: schedPairConf){ const bool useMultiprio = std::get<0>(useMultiprioAndPairs); const bool usePrioPairs = std::get<1>(useMultiprioAndPairs); const bool useLocality = std::get<2>(useMultiprioAndPairs); - for(int idxGpu = 0 ; idxGpu <= nbGpus ; ++idxGpu){ + for(int idxGpu = -1 ; idxGpu <= nbGpus ; ++idxGpu){ for(int idxNbBlocks = minnbblocks ; idxNbBlocks <= maxnbblocks ; idxNbBlocks *= 2){ for(int idxSize = minblocksize ; idxSize <= maxblocksize ; idxSize *= 2){ - file << idxGpu << "," << idxNbBlocks << "," << idxSize << "," + const int nbCpus = (idxGpu == -1 ? 1 : SpUtils::DefaultNumThreads()); + const int nbGpus = (idxGpu == -1 ? 0 : idxGpu); + + file << nbCpus << "," << nbGpus << "," << idxNbBlocks << "," << idxSize << "," << (useMultiprio?"TRUE":"FALSE") << "," << (usePrioPairs?"TRUE":"FALSE") << "," << (useLocality?"TRUE":"FALSE") << "," diff --git a/Benchmark/cholesky_gemm/cholesky.cpp b/Benchmark/cholesky_gemm/cholesky.cpp index 95f13d158d9a310a0b90b90ef24fc512be565e58..6e85cb174a53327b43766a09401c95f479b6ca43 100644 --- a/Benchmark/cholesky_gemm/cholesky.cpp +++ b/Benchmark/cholesky_gemm/cholesky.cpp @@ -58,7 +58,7 @@ thread_local HipHandles handle; template <int MaxNbDevices, const bool FavorLocality> auto choleskyFactorization(const int NbLoops, SpBlas::Block blocksInput[], const int inMatrixDim, const int inBlockDim, - const int nbGpu, const bool useMultiPrioScheduler, const bool usePrioPairs){ + const int nbCpu, const int nbGpu, const bool useMultiPrioScheduler, const bool usePrioPairs){ const int nbBlocks = (inMatrixDim+inBlockDim-1)/inBlockDim; #if defined(SPECX_COMPILE_WITH_CUDA) || defined(SPECX_COMPILE_WITH_HIP) @@ -69,9 +69,9 @@ auto choleskyFactorization(const int NbLoops, SpBlas::Block blocksInput[], const else{ scheduler = std::unique_ptr<SpAbstractScheduler>(new SpMultiPrioScheduler<MaxNbDevices,FavorLocality>(nbGpu*SpCudaUtils::GetDefaultNbStreams())); } - SpComputeEngine ce(SpWorkerTeamBuilder::TeamOfCpuGpuWorkers(SpUtils::DefaultNumThreads(), nbGpu), std::move(scheduler)); + SpComputeEngine ce(SpWorkerTeamBuilder::TeamOfCpuGpuWorkers(nbCpu, nbGpu), std::move(scheduler)); #else - SpComputeEngine ce(SpWorkerTeamBuilder::TeamOfCpuWorkers()); + SpComputeEngine ce(SpWorkerTeamBuilder::TeamOfCpuWorkers(nbCpu)); #endif #ifdef SPECX_COMPILE_WITH_CUDA @@ -434,9 +434,12 @@ int main(int argc, char** argv){ std::make_tuple(true, false, true)}; for(auto useMultiprioAndPairs: schedPairConf){ - for(int idxGpu = 0 ; idxGpu <= nbGpus ; ++idxGpu){ + for(int idxGpu = -1 ; idxGpu <= nbGpus ; ++idxGpu){ for(int BlockSize = MinBlockSize ; BlockSize <= MaxBlockSize ; BlockSize *= 2){ for(int MatrixSize = MinMatrixSize ; MatrixSize <= MaxMatrixSize ; MatrixSize *= 2){ + const int nbCpus = (idxGpu == -1 ? 1 : SpUtils::DefaultNumThreads()); + const int nbGpus = (idxGpu == -1 ? 0 : idxGpu); + const bool useMultiprio = std::get<0>(useMultiprioAndPairs); const bool usePrioPairs = std::get<1>(useMultiprioAndPairs); const bool useLocality = std::get<2>(useMultiprioAndPairs); @@ -468,9 +471,9 @@ int main(int argc, char** argv){ ///////////////////////////////////////////////////////// const auto minMaxAvg = (useLocality ? choleskyFactorization<8,true>(NbLoops, blocks.get(), MatrixSize, BlockSize, - idxGpu, useMultiprio, usePrioPairs) : + nbCpus, nbGpus, useMultiprio, usePrioPairs) : choleskyFactorization<8,false>(NbLoops, blocks.get(), MatrixSize, BlockSize, - idxGpu, useMultiprio, usePrioPairs)); + nbCpus, nbGpus, useMultiprio, usePrioPairs)); allDurations.push_back(minMaxAvg); std::cout << " - Duration = " << minMaxAvg[0] << " " << minMaxAvg[1] << " " << minMaxAvg[2] << std::endl; std::cout << " - Transfers = " << minMaxAvg[3] << " " << minMaxAvg[4] << " " << minMaxAvg[5] << " " << minMaxAvg[6] << " " << minMaxAvg[7] << std::endl; @@ -500,17 +503,20 @@ int main(int argc, char** argv){ return 1; } - file << "NbGpu,MatrixSize,BlockSize,Multiprio,PrioPair,FavorLocality,MinDuration,MaxDuration,AvgDuration,TotalTransfer,MaxTransfer,DeviceToHostTransfer,HostToDeviceTransfer,DeviceToDeviceTransfer" << std::endl; + file << "NbCpu,NbGpu,MatrixSize,BlockSize,Multiprio,PrioPair,FavorLocality,MinDuration,MaxDuration,AvgDuration,TotalTransfer,MaxTransfer,DeviceToHostTransfer,HostToDeviceTransfer,DeviceToDeviceTransfer" << std::endl; int idxDuration = 0; for(auto useMultiprioAndPairs: schedPairConf){ - for(int idxGpu = 0 ; idxGpu <= nbGpus ; ++idxGpu){ + for(int idxGpu = -1 ; idxGpu <= nbGpus ; ++idxGpu){ for(int BlockSize = MinBlockSize ; BlockSize <= MaxBlockSize ; BlockSize *= 2){ for(int MatrixSize = MinMatrixSize ; MatrixSize <= MaxMatrixSize ; MatrixSize *= 2){ + const int nbCpus = (idxGpu == -1 ? 1 : SpUtils::DefaultNumThreads()); + const int nbGpus = (idxGpu == -1 ? 0 : idxGpu); + const bool useMultiprio = std::get<0>(useMultiprioAndPairs); const bool usePrioPairs = std::get<1>(useMultiprioAndPairs); const bool useLocality = std::get<2>(useMultiprioAndPairs); - file << idxGpu << "," << MatrixSize << "," << BlockSize << "," + file << nbCpus << "," << nbGpus << "," << MatrixSize << "," << BlockSize << "," << (useMultiprio?"TRUE":"FALSE") << "," << (usePrioPairs?"TRUE":"FALSE") << "," << (useLocality?"TRUE":"FALSE") << "," diff --git a/Benchmark/cholesky_gemm/gemm.cpp b/Benchmark/cholesky_gemm/gemm.cpp index 0b62f36516166fc239988fbf8d18e2cd59676c34..49f7b34db0b54797240c18637808922e690385ab 100644 --- a/Benchmark/cholesky_gemm/gemm.cpp +++ b/Benchmark/cholesky_gemm/gemm.cpp @@ -46,7 +46,7 @@ thread_local hipblasHandle_t handle; template <int MaxNbDevices, const bool FavorLocality> auto gemm(const int NbLoops, SpBlas::Block blocksC[], const SpBlas::Block blocksA[], const SpBlas::Block blocksB[], const int inMatrixDim, const int inBlockDim, - const int nbGpu, const bool useMultiPrioScheduler){ + const int nbCpus, const int nbGpu, const bool useMultiPrioScheduler){ const int nbBlocks = (inMatrixDim+inBlockDim-1)/inBlockDim; const bool exportTrace = false; @@ -58,9 +58,9 @@ auto gemm(const int NbLoops, SpBlas::Block blocksC[], const SpBlas::Block blocks else{ scheduler = std::unique_ptr<SpAbstractScheduler>(new SpMultiPrioScheduler<MaxNbDevices,FavorLocality>(nbGpu*SpCudaUtils::GetDefaultNbStreams())); } - SpComputeEngine ce(SpWorkerTeamBuilder::TeamOfCpuGpuWorkers(SpUtils::DefaultNumThreads(), nbGpu), std::move(scheduler)); + SpComputeEngine ce(SpWorkerTeamBuilder::TeamOfCpuGpuWorkers(nbCpus, nbGpu), std::move(scheduler)); #else - SpComputeEngine ce(SpWorkerTeamBuilder::TeamOfCpuWorkers()); + SpComputeEngine ce(SpWorkerTeamBuilder::TeamOfCpuWorkers(nbCpus)); #endif #ifdef SPECX_COMPILE_WITH_CUDA @@ -268,7 +268,10 @@ int main(int argc, char** argv){ for(auto useMultiprioAndPairs: schedPairConf){ for(int BlockSize = MinBlockSize ; BlockSize <= MaxBlockSize ; BlockSize *= 2){ for(int MatrixSize = MinMatrixSize ; MatrixSize <= MaxMatrixSize ; MatrixSize *= 2){ - for(int idxGpu = 0 ; idxGpu <= nbGpus ; ++idxGpu){ + for(int idxGpu = -1 ; idxGpu <= nbGpus ; ++idxGpu){ + const int nbCpus = (idxGpu == -1 ? 1 : SpUtils::DefaultNumThreads()); + const int nbGpus = (idxGpu == -1 ? 0 : idxGpu); + const bool useMultiprio = std::get<0>(useMultiprioAndPairs); const bool useLocality = std::get<1>(useMultiprioAndPairs); @@ -301,9 +304,9 @@ int main(int argc, char** argv){ auto blocksC = SpBlas::matrixToBlock(matrixC.get(), MatrixSize, BlockSize); const auto minMaxAvg = (useLocality ? gemm<8,true>(NbLoops, blocksC.get(), blocksA.get(), blocksB.get(), - MatrixSize, BlockSize, idxGpu, useMultiprio): + MatrixSize, BlockSize, nbCpus, nbGpus, useMultiprio): gemm<8,false>(NbLoops, blocksC.get(), blocksA.get(), blocksB.get(), - MatrixSize, BlockSize, idxGpu, useMultiprio)); + MatrixSize, BlockSize, nbCpus, nbGpus, useMultiprio)); allDurations.push_back(minMaxAvg); std::cout << " - Duration = " << minMaxAvg[0] << " " << minMaxAvg[1] << " " << minMaxAvg[2] << std::endl; std::cout << " - Transfers = " << minMaxAvg[3] << " " << minMaxAvg[4] << " " << minMaxAvg[5] << " " << minMaxAvg[6] << " " << minMaxAvg[7] << std::endl; @@ -333,16 +336,19 @@ int main(int argc, char** argv){ return 1; } - file << "NbGpu,MatrixSize,BlockSize,Multiprio,PrioPair,FavorLocality,MinDuration,MaxDuration,AvgDuration,TotalTransfer,MaxTransfer,DeviceToHostTransfer,HostToDeviceTransfer,DeviceToDeviceTransfer" << std::endl; + file << "NbCpu,NbGpu,MatrixSize,BlockSize,Multiprio,PrioPair,FavorLocality,MinDuration,MaxDuration,AvgDuration,TotalTransfer,MaxTransfer,DeviceToHostTransfer,HostToDeviceTransfer,DeviceToDeviceTransfer" << std::endl; int idxDuration = 0; for(auto useMultiprioAndPairs: schedPairConf){ for(int BlockSize = MinBlockSize ; BlockSize <= MaxBlockSize ; BlockSize *= 2){ for(int MatrixSize = MinMatrixSize ; MatrixSize <= MaxMatrixSize ; MatrixSize *= 2){ - for(int idxGpu = 0 ; idxGpu <= nbGpus ; ++idxGpu){ + for(int idxGpu = -1 ; idxGpu <= nbGpus ; ++idxGpu){ + const int nbCpus = (idxGpu == -1 ? 1 : SpUtils::DefaultNumThreads()); + const int nbGpus = (idxGpu == -1 ? 0 : idxGpu); + const bool useMultiprio = std::get<0>(useMultiprioAndPairs); const bool useLocality = std::get<1>(useMultiprioAndPairs); - file << idxGpu << "," << MatrixSize << "," << BlockSize << "," + file << nbCpus << "," << nbGpus << "," << MatrixSize << "," << BlockSize << "," << (useMultiprio?"TRUE":"FALSE") << "," << "FALSE" << "," << (useLocality?"TRUE":"FALSE") << "," diff --git a/Benchmark/particles/particles-simu.cpp b/Benchmark/particles/particles-simu.cpp index 54cc36c941d4509ee965ac5d89f05767787ab208..4360854c7d0380c5830ec74a388dcb57141fee50 100644 --- a/Benchmark/particles/particles-simu.cpp +++ b/Benchmark/particles/particles-simu.cpp @@ -704,7 +704,7 @@ auto GetPriority(const bool usePrioPairs, const int maxInteractions, const int m template <int MaxNbDevices, const bool FavorLocality> auto BenchCore( const int NbLoops, const int MinPartsPerGroup, const int MaxPartsPerGroup, - const int NbGroups, const int nbGpu, const bool useMultiPrioScheduler, + const int NbGroups, const int nbCpu, const int nbGpu, const bool useMultiPrioScheduler, const bool usePrioPairs, const TuneResult& inKernelConfig, const int maxInteractions, const int minInteractions){ @@ -727,9 +727,9 @@ auto BenchCore( const int NbLoops, const int MinPartsPerGroup, const int MaxPart else{ scheduler = std::unique_ptr<SpAbstractScheduler>(new SpMultiPrioScheduler<MaxNbDevices,FavorLocality>(nbGpu*SpCudaUtils::GetDefaultNbStreams())); } - SpComputeEngine ce(SpWorkerTeamBuilder::TeamOfCpuGpuWorkers(SpUtils::DefaultNumThreads(), nbGpu), std::move(scheduler)); + SpComputeEngine ce(SpWorkerTeamBuilder::TeamOfCpuGpuWorkers(nbCpu, nbGpu), std::move(scheduler)); #else - SpComputeEngine ce(SpWorkerTeamBuilder::TeamOfCpuWorkers()); + SpComputeEngine ce(SpWorkerTeamBuilder::TeamOfCpuWorkers(nbCpu)); #endif std::vector<double> minMaxAvg(3); @@ -917,8 +917,10 @@ void BenchmarkTest(int argc, char** argv, const TuneResult& inKernelConfig){ std::make_tuple(true, false, true)}; for(auto useMultiprioAndPairs: schedPairConf){ - for(int idxGpu = 0 ; idxGpu <= nbGpus ; ++idxGpu){ + for(int idxGpu = -1 ; idxGpu <= nbGpus ; ++idxGpu){ for(int idxBlock = MinNbGroups ; idxBlock <= MaxNbGroups ; idxBlock *= 2){ + const int nbCpus = (idxGpu == -1 ? 1 : SpUtils::DefaultNumThreads()); + const int nbGpus = (idxGpu == -1 ? 0 : idxGpu); const bool useMultiprio = std::get<0>(useMultiprioAndPairs); const bool usePrioPairs = std::get<1>(useMultiprioAndPairs); const bool useLocality = std::get<2>(useMultiprioAndPairs); @@ -929,10 +931,10 @@ void BenchmarkTest(int argc, char** argv, const TuneResult& inKernelConfig){ << " Favor Loc = " << useLocality << std::endl; const auto minMaxAvg = (useLocality ? BenchCore<8,true>(NbLoops, MinPartsPerGroup, - MaxPartsPerGroup, idxBlock, idxGpu, useMultiprio, usePrioPairs, inKernelConfig, + MaxPartsPerGroup, idxBlock, nbCpus, nbGpus, useMultiprio, usePrioPairs, inKernelConfig, maxInteractions, minInteractions): BenchCore<8,false>(NbLoops, MinPartsPerGroup, - MaxPartsPerGroup, idxBlock, idxGpu, useMultiprio, usePrioPairs, inKernelConfig, + MaxPartsPerGroup, idxBlock, nbCpus, nbGpus, useMultiprio, usePrioPairs, inKernelConfig, maxInteractions, minInteractions)); allDurations.push_back(minMaxAvg); std::cout << " - Min = " << minMaxAvg[0] << " Max = " << minMaxAvg[1] << " Avg = " << minMaxAvg[2] << std::endl; @@ -947,11 +949,13 @@ void BenchmarkTest(int argc, char** argv, const TuneResult& inKernelConfig){ return; } - file << "NbGpu,BlockSize,Multiprio,Multiprio,PrioPair,MinDuration,MaxDuration,AvgDuration,TotalTransfer,MaxTransfer,DeviceToHostTransfer,HostToDeviceTransfer,DeviceToDeviceTransfer" << std::endl; + file << "NbCpu,NbGpu,BlockSize,Multiprio,Multiprio,PrioPair,MinDuration,MaxDuration,AvgDuration,TotalTransfer,MaxTransfer,DeviceToHostTransfer,HostToDeviceTransfer,DeviceToDeviceTransfer" << std::endl; int idxDuration = 0; for(auto useMultiprioAndPairs: schedPairConf){ - for(int idxGpu = 0 ; idxGpu <= nbGpus ; ++idxGpu){ + for(int idxGpu = -1 ; idxGpu <= nbGpus ; ++idxGpu){ for(int idxBlock = MinNbGroups ; idxBlock <= MaxNbGroups ; idxBlock *= 2){ + const int nbCpus = (idxGpu == -1 ? 1 : SpUtils::DefaultNumThreads()); + const int nbGpus = (idxGpu == -1 ? 0 : idxGpu); const bool useMultiprio = std::get<0>(useMultiprioAndPairs); const bool usePrioPairs = std::get<1>(useMultiprioAndPairs); const bool useLocality = std::get<2>(useMultiprioAndPairs);