Mentions légales du service

Skip to content
Snippets Groups Projects
Commit ca9728b1 authored by PRUVOST Florent's avatar PRUVOST Florent
Browse files

add some synchro in cuda gelqt, tslqt and tsqrt kernel to improve algo robustness

parent 2d5f122e
No related branches found
No related tags found
No related merge requests found
......@@ -142,6 +142,7 @@ int CUDA_zgelqt(
magma_zlarfb_gpu( MagmaRight, MagmaNoTrans, MagmaForward, MagmaRowwise,
rows, cols, ib, da_ref(i,i), ldda, dt_ref(0,i),
lddt, da_ref(i+ib,i), ldda, dwork, lddwork);
cudaThreadSynchronize();
old_i = i;
old_ib = ib;
if (i+nb >= k){
......
......@@ -115,7 +115,6 @@ int CUDA_zgemerge(
for(i=0; i<N; i++){
cola = A + i*LDA;
colb = B + i*LDB;
// cublasZcopy(i+1, cola, 1, colb, 1);
cudaMemcpyAsync(colb , cola,
(i+1)*sizeof(cuDoubleComplex),
cudaMemcpyDeviceToDevice, stream);
......@@ -124,7 +123,6 @@ int CUDA_zgemerge(
for(i=0; i<N; i++){
cola = A + i*LDA;
colb = B + i*LDB;
// cublasZcopy(M-i, cola + i, 1, colb + i, 1);
cudaMemcpyAsync(colb+i , cola+i,
(M-i)*sizeof(cuDoubleComplex),
cudaMemcpyDeviceToDevice, stream);
......
......@@ -163,6 +163,7 @@ int CUDA_ztslqt(
dwork, lddwork,
dwork + nb * lddwork, nb,
stream );
cudaThreadSynchronize();
old_i = i;
old_ib = ib;
}
......
......@@ -185,6 +185,7 @@ int CUDA_ztsqrt(
dwork, ib,
dwork + ib * cols, rows,
stream );
cudaThreadSynchronize();
old_i = i;
old_ib = ib;
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment