Commit ca9728b1 authored by PRUVOST Florent's avatar PRUVOST Florent

add some synchro in cuda gelqt, tslqt and tsqrt kernel to improve algo robustness

parent 2d5f122e
......@@ -142,6 +142,7 @@ int CUDA_zgelqt(
magma_zlarfb_gpu( MagmaRight, MagmaNoTrans, MagmaForward, MagmaRowwise,
rows, cols, ib, da_ref(i,i), ldda, dt_ref(0,i),
lddt, da_ref(i+ib,i), ldda, dwork, lddwork);
cudaThreadSynchronize();
old_i = i;
old_ib = ib;
if (i+nb >= k){
......
......@@ -115,7 +115,6 @@ int CUDA_zgemerge(
for(i=0; i<N; i++){
cola = A + i*LDA;
colb = B + i*LDB;
// cublasZcopy(i+1, cola, 1, colb, 1);
cudaMemcpyAsync(colb , cola,
(i+1)*sizeof(cuDoubleComplex),
cudaMemcpyDeviceToDevice, stream);
......@@ -124,7 +123,6 @@ int CUDA_zgemerge(
for(i=0; i<N; i++){
cola = A + i*LDA;
colb = B + i*LDB;
// cublasZcopy(M-i, cola + i, 1, colb + i, 1);
cudaMemcpyAsync(colb+i , cola+i,
(M-i)*sizeof(cuDoubleComplex),
cudaMemcpyDeviceToDevice, stream);
......
......@@ -163,6 +163,7 @@ int CUDA_ztslqt(
dwork, lddwork,
dwork + nb * lddwork, nb,
stream );
cudaThreadSynchronize();
old_i = i;
old_ib = ib;
}
......
......@@ -185,6 +185,7 @@ int CUDA_ztsqrt(
dwork, ib,
dwork + ib * cols, rows,
stream );
cudaThreadSynchronize();
old_i = i;
old_ib = ib;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment