Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
AGULLO Emmanuel
Chameleon
Commits
ca9728b1
Commit
ca9728b1
authored
Apr 11, 2016
by
PRUVOST Florent
Browse files
add some synchro in cuda gelqt, tslqt and tsqrt kernel to improve algo robustness
parent
2d5f122e
Changes
4
Hide whitespace changes
Inline
Side-by-side
cudablas/compute/cuda_zgelqt.c
View file @
ca9728b1
...
...
@@ -142,6 +142,7 @@ int CUDA_zgelqt(
magma_zlarfb_gpu
(
MagmaRight
,
MagmaNoTrans
,
MagmaForward
,
MagmaRowwise
,
rows
,
cols
,
ib
,
da_ref
(
i
,
i
),
ldda
,
dt_ref
(
0
,
i
),
lddt
,
da_ref
(
i
+
ib
,
i
),
ldda
,
dwork
,
lddwork
);
cudaThreadSynchronize
();
old_i
=
i
;
old_ib
=
ib
;
if
(
i
+
nb
>=
k
){
...
...
cudablas/compute/cuda_zgemerge.c
View file @
ca9728b1
...
...
@@ -115,7 +115,6 @@ int CUDA_zgemerge(
for
(
i
=
0
;
i
<
N
;
i
++
){
cola
=
A
+
i
*
LDA
;
colb
=
B
+
i
*
LDB
;
// cublasZcopy(i+1, cola, 1, colb, 1);
cudaMemcpyAsync
(
colb
,
cola
,
(
i
+
1
)
*
sizeof
(
cuDoubleComplex
),
cudaMemcpyDeviceToDevice
,
stream
);
...
...
@@ -124,7 +123,6 @@ int CUDA_zgemerge(
for
(
i
=
0
;
i
<
N
;
i
++
){
cola
=
A
+
i
*
LDA
;
colb
=
B
+
i
*
LDB
;
// cublasZcopy(M-i, cola + i, 1, colb + i, 1);
cudaMemcpyAsync
(
colb
+
i
,
cola
+
i
,
(
M
-
i
)
*
sizeof
(
cuDoubleComplex
),
cudaMemcpyDeviceToDevice
,
stream
);
...
...
cudablas/compute/cuda_ztslqt.c
View file @
ca9728b1
...
...
@@ -163,6 +163,7 @@ int CUDA_ztslqt(
dwork
,
lddwork
,
dwork
+
nb
*
lddwork
,
nb
,
stream
);
cudaThreadSynchronize
();
old_i
=
i
;
old_ib
=
ib
;
}
...
...
cudablas/compute/cuda_ztsqrt.c
View file @
ca9728b1
...
...
@@ -185,6 +185,7 @@ int CUDA_ztsqrt(
dwork
,
ib
,
dwork
+
ib
*
cols
,
rows
,
stream
);
cudaThreadSynchronize
();
old_i
=
i
;
old_ib
=
ib
;
}
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment