Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
AGULLO Emmanuel
Chameleon
Commits
31e42080
Commit
31e42080
authored
Dec 04, 2016
by
Mathieu Faverge
Browse files
Make unmqr/unmlq call the larfb
parent
afa6c8f7
Changes
6
Hide whitespace changes
Inline
Side-by-side
coreblas/compute/core_ztsmqr.c
View file @
31e42080
...
...
@@ -254,10 +254,10 @@ int CORE_ztsmqr(MORSE_enum side, MORSE_enum trans,
CORE_zparfb
(
side
,
trans
,
MorseForward
,
MorseColumnwise
,
mi
,
ni
,
M2
,
N2
,
kb
,
0
,
&
A1
[
LDA1
*
jc
+
ic
]
,
LDA1
,
A1
+
LDA1
*
jc
+
ic
,
LDA1
,
A2
,
LDA2
,
&
V
[
LDV
*
i
]
,
LDV
,
&
T
[
LDT
*
i
]
,
LDT
,
V
+
LDV
*
i
,
LDV
,
T
+
LDT
*
i
,
LDT
,
WORK
,
LDWORK
);
}
return
MORSE_SUCCESS
;
...
...
cudablas/compute/CMakeLists.txt
View file @
31e42080
...
...
@@ -32,6 +32,7 @@ set(ZSRC
cuda_zhemm.c
cuda_zher2k.c
cuda_zherk.c
cuda_zlarfb.c
cuda_zparfb.c
cuda_zsymm.c
cuda_zsyr2k.c
...
...
@@ -40,6 +41,8 @@ set(ZSRC
cuda_ztrsm.c
cuda_ztsmlq.c
cuda_ztsmqr.c
cuda_zunmlqt.c
cuda_zunmqrt.c
)
if
(
CHAMELEON_USE_MAGMA
)
...
...
@@ -50,15 +53,12 @@ if( CHAMELEON_USE_MAGMA )
cuda_zgessm.c
cuda_zgetrf.c
cuda_zlauum.c
cuda_zparfb.c
cuda_zpotrf.c
cuda_zssssm.c
cuda_ztrtri.c
cuda_ztslqt.c
cuda_ztsqrt.c
cuda_ztstrf.c
cuda_zunmlqt.c
cuda_zunmqrt.c
)
endif
()
...
...
cudablas/compute/cuda_ztsmqr.c
View file @
31e42080
...
...
@@ -132,7 +132,6 @@ int CUDA_ztsmqr(
ni
=
N1
-
i
;
jc
=
i
;
}
/*
* Apply H or H' (NOTE: CORE_zparfb used to be CORE_ztsrfb)
*/
...
...
cudablas/compute/cuda_zunmlqt.c
View file @
31e42080
...
...
@@ -24,14 +24,14 @@
**/
#include "cudablas/include/cudablas.h"
#if defined(CHAMELEON_USE_MAGMA)
int
CUDA_zunmlqt
(
magma_side_t
side
,
magma_trans_t
trans
,
magma_int_t
M
,
magma_int_t
N
,
magma_int_t
K
,
magma_int_t
IB
,
const
magma
DoubleComplex
*
A
,
magma_
int
_t
LD
A
,
const
magma
DoubleComplex
*
T
,
magma_
int
_t
LD
T
,
magma
DoubleComplex
*
C
,
magma_
int
_t
LD
C
,
magmaDoubleComplex
*
WORK
,
magma_int_t
LDWORK
)
int
CUDA_zunmlqt
(
MORSE_enum
side
,
MORSE_enum
trans
,
int
M
,
int
N
,
int
K
,
int
IB
,
const
cuDoubleComplex
*
A
,
int
LDA
,
const
cu
DoubleComplex
*
T
,
int
LD
T
,
cu
DoubleComplex
*
C
,
int
LD
C
,
cu
DoubleComplex
*
WORK
,
int
LD
WORK
,
CUBLAS_STREAM_PARAM
)
{
int
i
,
kb
;
int
i1
,
i3
;
...
...
@@ -42,13 +42,13 @@ int CUDA_zunmlqt(
int
mi
=
M
;
/* Check input arguments */
if
((
side
!=
M
agma
Left
)
&&
(
side
!=
M
agma
Right
))
{
if
((
side
!=
M
orse
Left
)
&&
(
side
!=
M
orse
Right
))
{
return
-
1
;
}
/*
* NQ is the order of Q and NW is the minimum dimension of WORK
*/
if
(
side
==
M
agma
Left
)
{
if
(
side
==
M
orse
Left
)
{
nq
=
M
;
nw
=
N
;
}
...
...
@@ -57,7 +57,7 @@ int CUDA_zunmlqt(
nw
=
M
;
}
if
((
trans
!=
M
agma
NoTrans
)
&&
(
trans
!=
M
agma
ConjTrans
))
{
if
((
trans
!=
M
orse
NoTrans
)
&&
(
trans
!=
M
orse
ConjTrans
))
{
return
-
2
;
}
if
(
M
<
0
)
{
...
...
@@ -84,10 +84,10 @@ int CUDA_zunmlqt(
/* Quick return */
if
((
M
==
0
)
||
(
N
==
0
)
||
(
K
==
0
))
return
M
AGMA
_SUCCESS
;
return
M
ORSE
_SUCCESS
;
if
(((
side
==
M
agma
Left
)
&&
(
trans
==
M
agma
NoTrans
))
||
((
side
==
M
agma
Right
)
&&
(
trans
!=
M
agma
NoTrans
)))
{
if
(((
side
==
M
orse
Left
)
&&
(
trans
==
M
orse
NoTrans
))
||
((
side
==
M
orse
Right
)
&&
(
trans
!=
M
orse
NoTrans
)))
{
i1
=
0
;
i3
=
IB
;
}
...
...
@@ -106,7 +106,7 @@ int CUDA_zunmlqt(
for
(
i
=
i1
;
(
i
>-
1
)
&&
(
i
<
K
);
i
+=
i3
)
{
kb
=
min
(
IB
,
K
-
i
);
if
(
side
==
M
agma
Left
)
{
if
(
side
==
M
orse
Left
)
{
/*
* H or H' is applied to C(i:m,1:n)
*/
...
...
@@ -121,13 +121,13 @@ int CUDA_zunmlqt(
jc
=
i
;
}
magma
_zlarfb
_gpu
(
side
,
trans
,
M
agma
Forward
,
M
agma
Rowwise
,
mi
,
ni
,
kb
,
A
+
LDA
*
i
+
i
,
LDA
,
T
+
LDT
*
i
,
LDT
,
C
+
LDC
*
jc
+
ic
,
LDC
,
WORK
,
LDWORK
);
CUDA
_zlarfb
(
side
,
trans
,
M
orse
Forward
,
M
orse
Rowwise
,
mi
,
ni
,
kb
,
A
+
LDA
*
i
+
i
,
LDA
,
T
+
LDT
*
i
,
LDT
,
C
+
LDC
*
jc
+
ic
,
LDC
,
WORK
,
LDWORK
,
CUBLAS_STREAM_VALUE
);
}
return
MORSE_SUCCESS
;
}
#endif
cudablas/compute/cuda_zunmqrt.c
View file @
31e42080
...
...
@@ -24,14 +24,14 @@
**/
#include "cudablas/include/cudablas.h"
#if defined(CHAMELEON_USE_MAGMA)
int
CUDA_zunmqrt
(
magma_side_t
side
,
magma_trans_t
trans
,
magma_int_t
M
,
magma_int_t
N
,
magma_int_t
K
,
magma_int_t
IB
,
const
magma
DoubleComplex
*
A
,
magma_
int
_t
LD
A
,
const
magma
DoubleComplex
*
T
,
magma_
int
_t
LD
T
,
magma
DoubleComplex
*
C
,
magma_
int
_t
LD
C
,
magmaDoubleComplex
*
WORK
,
magma_int_t
LDWORK
)
int
CUDA_zunmqrt
(
MORSE_enum
side
,
MORSE_enum
trans
,
int
M
,
int
N
,
int
K
,
int
IB
,
const
cuDoubleComplex
*
A
,
int
LDA
,
const
cu
DoubleComplex
*
T
,
int
LD
T
,
cu
DoubleComplex
*
C
,
int
LD
C
,
cu
DoubleComplex
*
WORK
,
int
LD
WORK
,
CUBLAS_STREAM_PARAM
)
{
int
i
,
kb
;
int
i1
,
i3
;
...
...
@@ -42,13 +42,13 @@ int CUDA_zunmqrt(
int
mi
=
M
;
/* Check input arguments */
if
((
side
!=
M
agma
Left
)
&&
(
side
!=
M
agma
Right
))
{
if
((
side
!=
M
orse
Left
)
&&
(
side
!=
M
orse
Right
))
{
return
-
1
;
}
/*
* NQ is the order of Q and NW is the minimum dimension of WORK
*/
if
(
side
==
M
agma
Left
)
{
if
(
side
==
M
orse
Left
)
{
nq
=
M
;
nw
=
N
;
}
...
...
@@ -57,7 +57,7 @@ int CUDA_zunmqrt(
nw
=
M
;
}
if
((
trans
!=
M
agma
NoTrans
)
&&
(
trans
!=
M
agma
ConjTrans
))
{
if
((
trans
!=
M
orse
NoTrans
)
&&
(
trans
!=
M
orse
ConjTrans
))
{
return
-
2
;
}
if
(
M
<
0
)
{
...
...
@@ -84,10 +84,10 @@ int CUDA_zunmqrt(
/* Quick return */
if
((
M
==
0
)
||
(
N
==
0
)
||
(
K
==
0
))
return
M
AGMA
_SUCCESS
;
return
M
ORSE
_SUCCESS
;
if
(((
side
==
M
agma
Left
)
&&
(
trans
!=
M
agma
NoTrans
))
||
((
side
==
M
agma
Right
)
&&
(
trans
==
M
agma
NoTrans
)))
{
if
(((
side
==
M
orse
Left
)
&&
(
trans
!=
M
orse
NoTrans
))
||
((
side
==
M
orse
Right
)
&&
(
trans
==
M
orse
NoTrans
)))
{
i1
=
0
;
i3
=
IB
;
}
...
...
@@ -99,7 +99,7 @@ int CUDA_zunmqrt(
for
(
i
=
i1
;
(
i
>-
1
)
&&
(
i
<
K
);
i
+=
i3
)
{
kb
=
min
(
IB
,
K
-
i
);
if
(
side
==
M
agma
Left
)
{
if
(
side
==
M
orse
Left
)
{
/*
* H or H' is applied to C(i:m,1:n)
*/
...
...
@@ -114,14 +114,14 @@ int CUDA_zunmqrt(
jc
=
i
;
}
magma_zlarfb_gpu
(
side
,
trans
,
MagmaForward
,
MagmaColumnwise
,
mi
,
ni
,
kb
,
A
+
LDA
*
i
+
i
,
LDA
,
T
+
LDT
*
i
,
LDT
,
C
+
LDC
*
jc
+
ic
,
LDC
,
WORK
,
LDWORK
);
CUDA_zlarfb
(
side
,
trans
,
MorseForward
,
MorseColumnwise
,
mi
,
ni
,
kb
,
A
+
LDA
*
i
+
i
,
LDA
,
T
+
LDT
*
i
,
LDT
,
C
+
LDC
*
jc
+
ic
,
LDC
,
WORK
,
LDWORK
,
CUBLAS_STREAM_VALUE
);
}
return
MORSE_SUCCESS
;
}
#endif
cudablas/include/cudablas_z.h
View file @
31e42080
...
...
@@ -54,6 +54,7 @@ int CUDA_zgeqrt( magma_int_t m, magma_int_t n, magma_int_t nb, magmaDoubleComple
int
CUDA_zgessm
(
char
storev
,
magma_int_t
m
,
magma_int_t
n
,
magma_int_t
k
,
magma_int_t
ib
,
magma_int_t
*
ipiv
,
cuDoubleComplex
*
dL1
,
magma_int_t
lddl1
,
cuDoubleComplex
*
dL
,
magma_int_t
lddl
,
cuDoubleComplex
*
dA
,
magma_int_t
ldda
,
magma_int_t
*
info
);
int
CUDA_zgetrf_incpiv
(
char
storev
,
magma_int_t
m
,
magma_int_t
n
,
magma_int_t
ib
,
cuDoubleComplex
*
hA
,
magma_int_t
ldha
,
cuDoubleComplex
*
dA
,
magma_int_t
ldda
,
cuDoubleComplex
*
hL
,
magma_int_t
ldhl
,
cuDoubleComplex
*
dL
,
magma_int_t
lddl
,
magma_int_t
*
ipiv
,
cuDoubleComplex
*
dwork
,
magma_int_t
lddwork
,
magma_int_t
*
info
);
int
CUDA_zgetrf_nopiv
(
magma_int_t
m
,
magma_int_t
n
,
cuDoubleComplex
*
dA
,
magma_int_t
ldda
,
magma_int_t
*
info
);
int
CUDA_zlarfb
(
MORSE_enum
side
,
MORSE_enum
trans
,
MORSE_enum
direct
,
MORSE_enum
storev
,
int
M
,
int
N
,
int
K
,
const
cuDoubleComplex
*
V
,
int
LDV
,
const
cuDoubleComplex
*
T
,
int
LDT
,
cuDoubleComplex
*
C
,
int
LDC
,
cuDoubleComplex
*
WORK
,
int
LDWORK
,
CUBLAS_STREAM_PARAM
);
int
CUDA_zlauum
(
char
uplo
,
magma_int_t
n
,
cuDoubleComplex
*
dA
,
magma_int_t
ldda
,
magma_int_t
*
info
);
int
CUDA_zpotrf
(
magma_uplo_t
uplo
,
magma_int_t
n
,
magmaDoubleComplex
*
dA
,
magma_int_t
ldda
,
magma_int_t
*
info
);
int
CUDA_zssssm
(
magma_storev_t
storev
,
magma_int_t
m1
,
magma_int_t
n1
,
magma_int_t
m2
,
magma_int_t
n2
,
magma_int_t
k
,
magma_int_t
ib
,
magmaDoubleComplex
*
dA1
,
magma_int_t
ldda1
,
magmaDoubleComplex
*
dA2
,
magma_int_t
ldda2
,
magmaDoubleComplex
*
dL1
,
magma_int_t
lddl1
,
magmaDoubleComplex
*
dL2
,
magma_int_t
lddl2
,
magma_int_t
*
IPIV
,
magma_int_t
*
info
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment