diff --git a/Benchmark/cholesky_gemm/gemm.cpp b/Benchmark/cholesky_gemm/gemm.cpp
index 2e5163bb675c06fc1938c389398369fac5564ae6..0098fc30624fa229786f49e48c7b75923c24d974 100644
--- a/Benchmark/cholesky_gemm/gemm.cpp
+++ b/Benchmark/cholesky_gemm/gemm.cpp
@@ -116,18 +116,17 @@ auto gemm(const int NbLoops, SpBlas::Block blocksC[], const SpBlas::Block blocks
                                     inBlockDim, inBlockDim, inBlockDim, &alphaBeta, (const double*)paramA.getRawPtr(), inBlockDim,
                                     (const double*)paramB.getRawPtr(), inBlockDim,
                                     &alphaBeta, (double*)paramC.getRawPtr(), inBlockDim ) );
-CUDA_ASSERT(cudaStreamSynchronize(SpCudaUtils::GetCurrentStream()));
                         })
                 #endif
                 #ifdef SPECX_COMPILE_WITH_HIP
                         , SpHip([inBlockDim](SpDeviceDataView<SpBlas::Block> paramC, const SpDeviceDataView<const SpBlas::Block> paramA,
                                             const SpDeviceDataView<const SpBlas::Block> paramB) {
                                 // paramA.getRawPtr(), paramA.getRawSize()
-                                //const double alphaBeta = 1.0;
-                                //HIPBLAS_ASSERT( hipblasDgemm( handle, HIPBLAS_OP_N, HIPBLAS_OP_N,
-                                //        inBlockDim, inBlockDim, inBlockDim, &alphaBeta, (const double*)paramA.getRawPtr(), inBlockDim,
-                                //        (const double*)paramB.getRawPtr(), inBlockDim,
-                                //        &alphaBeta, (double*)paramC.getRawPtr(), inBlockDim ) );
+                                const double alphaBeta = 1.0;
+                                HIPBLAS_ASSERT( hipblasDgemm( handle, HIPBLAS_OP_N, HIPBLAS_OP_N,
+                                        inBlockDim, inBlockDim, inBlockDim, &alphaBeta, (const double*)paramA.getRawPtr(), inBlockDim,
+                                        (const double*)paramB.getRawPtr(), inBlockDim,
+                                        &alphaBeta, (double*)paramC.getRawPtr(), inBlockDim ) );
                             })
                 #endif
                     ).setTaskName(std::string("GEMM -- (")+std::to_string(i)+","+std::to_string(j)+")");
@@ -261,7 +260,7 @@ int main(int argc, char** argv){
     for(auto useMultiprioAndPairs: schedPairConf){
         for(int BlockSize = MinBlockSize ; BlockSize <= MaxBlockSize ; BlockSize *= 2){
             for(int MatrixSize = MinMatrixSize ; MatrixSize <= MaxMatrixSize ; MatrixSize *= 2){
-                for(int idxGpu = nbGpus ; idxGpu <= nbGpus ; ++idxGpu){
+                for(int idxGpu = 0 ; idxGpu <= nbGpus ; ++idxGpu){
                     const bool useMultiprio = std::get<0>(useMultiprioAndPairs);
                     const bool useLocality = std::get<1>(useMultiprioAndPairs);