Mentions légales du service

Skip to content
Snippets Groups Projects
Commit 31e42080 authored by Mathieu Faverge's avatar Mathieu Faverge
Browse files

Make unmqr/unmlq call the larfb

parent afa6c8f7
No related branches found
No related tags found
No related merge requests found
...@@ -254,10 +254,10 @@ int CORE_ztsmqr(MORSE_enum side, MORSE_enum trans, ...@@ -254,10 +254,10 @@ int CORE_ztsmqr(MORSE_enum side, MORSE_enum trans,
CORE_zparfb( CORE_zparfb(
side, trans, MorseForward, MorseColumnwise, side, trans, MorseForward, MorseColumnwise,
mi, ni, M2, N2, kb, 0, mi, ni, M2, N2, kb, 0,
&A1[LDA1*jc+ic], LDA1, A1 + LDA1*jc+ic, LDA1,
A2, LDA2, A2, LDA2,
&V[LDV*i], LDV, V + LDV*i, LDV,
&T[LDT*i], LDT, T + LDT*i, LDT,
WORK, LDWORK); WORK, LDWORK);
} }
return MORSE_SUCCESS; return MORSE_SUCCESS;
......
...@@ -32,6 +32,7 @@ set(ZSRC ...@@ -32,6 +32,7 @@ set(ZSRC
cuda_zhemm.c cuda_zhemm.c
cuda_zher2k.c cuda_zher2k.c
cuda_zherk.c cuda_zherk.c
cuda_zlarfb.c
cuda_zparfb.c cuda_zparfb.c
cuda_zsymm.c cuda_zsymm.c
cuda_zsyr2k.c cuda_zsyr2k.c
...@@ -40,6 +41,8 @@ set(ZSRC ...@@ -40,6 +41,8 @@ set(ZSRC
cuda_ztrsm.c cuda_ztrsm.c
cuda_ztsmlq.c cuda_ztsmlq.c
cuda_ztsmqr.c cuda_ztsmqr.c
cuda_zunmlqt.c
cuda_zunmqrt.c
) )
if( CHAMELEON_USE_MAGMA ) if( CHAMELEON_USE_MAGMA )
...@@ -50,15 +53,12 @@ if( CHAMELEON_USE_MAGMA ) ...@@ -50,15 +53,12 @@ if( CHAMELEON_USE_MAGMA )
cuda_zgessm.c cuda_zgessm.c
cuda_zgetrf.c cuda_zgetrf.c
cuda_zlauum.c cuda_zlauum.c
cuda_zparfb.c
cuda_zpotrf.c cuda_zpotrf.c
cuda_zssssm.c cuda_zssssm.c
cuda_ztrtri.c cuda_ztrtri.c
cuda_ztslqt.c cuda_ztslqt.c
cuda_ztsqrt.c cuda_ztsqrt.c
cuda_ztstrf.c cuda_ztstrf.c
cuda_zunmlqt.c
cuda_zunmqrt.c
) )
endif() endif()
......
...@@ -132,7 +132,6 @@ int CUDA_ztsmqr( ...@@ -132,7 +132,6 @@ int CUDA_ztsmqr(
ni = N1 - i; ni = N1 - i;
jc = i; jc = i;
} }
/* /*
* Apply H or H' (NOTE: CORE_zparfb used to be CORE_ztsrfb) * Apply H or H' (NOTE: CORE_zparfb used to be CORE_ztsrfb)
*/ */
......
...@@ -24,14 +24,14 @@ ...@@ -24,14 +24,14 @@
**/ **/
#include "cudablas/include/cudablas.h" #include "cudablas/include/cudablas.h"
#if defined(CHAMELEON_USE_MAGMA) int
int CUDA_zunmlqt( CUDA_zunmlqt(MORSE_enum side, MORSE_enum trans,
magma_side_t side, magma_trans_t trans, int M, int N, int K, int IB,
magma_int_t M, magma_int_t N, magma_int_t K, magma_int_t IB, const cuDoubleComplex *A, int LDA,
const magmaDoubleComplex *A, magma_int_t LDA, const cuDoubleComplex *T, int LDT,
const magmaDoubleComplex *T, magma_int_t LDT, cuDoubleComplex *C, int LDC,
magmaDoubleComplex *C, magma_int_t LDC, cuDoubleComplex *WORK, int LDWORK,
magmaDoubleComplex *WORK, magma_int_t LDWORK ) CUBLAS_STREAM_PARAM )
{ {
int i, kb; int i, kb;
int i1, i3; int i1, i3;
...@@ -42,13 +42,13 @@ int CUDA_zunmlqt( ...@@ -42,13 +42,13 @@ int CUDA_zunmlqt(
int mi = M; int mi = M;
/* Check input arguments */ /* Check input arguments */
if ((side != MagmaLeft) && (side != MagmaRight)) { if ((side != MorseLeft) && (side != MorseRight)) {
return -1; return -1;
} }
/* /*
* NQ is the order of Q and NW is the minimum dimension of WORK * NQ is the order of Q and NW is the minimum dimension of WORK
*/ */
if (side == MagmaLeft) { if (side == MorseLeft) {
nq = M; nq = M;
nw = N; nw = N;
} }
...@@ -57,7 +57,7 @@ int CUDA_zunmlqt( ...@@ -57,7 +57,7 @@ int CUDA_zunmlqt(
nw = M; nw = M;
} }
if ((trans != MagmaNoTrans) && (trans != MagmaConjTrans)) { if ((trans != MorseNoTrans) && (trans != MorseConjTrans)) {
return -2; return -2;
} }
if (M < 0) { if (M < 0) {
...@@ -84,10 +84,10 @@ int CUDA_zunmlqt( ...@@ -84,10 +84,10 @@ int CUDA_zunmlqt(
/* Quick return */ /* Quick return */
if ((M == 0) || (N == 0) || (K == 0)) if ((M == 0) || (N == 0) || (K == 0))
return MAGMA_SUCCESS; return MORSE_SUCCESS;
if (((side == MagmaLeft) && (trans == MagmaNoTrans)) if (((side == MorseLeft) && (trans == MorseNoTrans))
|| ((side == MagmaRight) && (trans != MagmaNoTrans))) { || ((side == MorseRight) && (trans != MorseNoTrans))) {
i1 = 0; i1 = 0;
i3 = IB; i3 = IB;
} }
...@@ -106,7 +106,7 @@ int CUDA_zunmlqt( ...@@ -106,7 +106,7 @@ int CUDA_zunmlqt(
for(i = i1; (i >- 1) && (i < K); i+=i3 ) { for(i = i1; (i >- 1) && (i < K); i+=i3 ) {
kb = min(IB, K-i); kb = min(IB, K-i);
if (side == MagmaLeft) { if (side == MorseLeft) {
/* /*
* H or H' is applied to C(i:m,1:n) * H or H' is applied to C(i:m,1:n)
*/ */
...@@ -121,13 +121,13 @@ int CUDA_zunmlqt( ...@@ -121,13 +121,13 @@ int CUDA_zunmlqt(
jc = i; jc = i;
} }
magma_zlarfb_gpu( side, trans, MagmaForward, MagmaRowwise, CUDA_zlarfb( side, trans, MorseForward, MorseRowwise,
mi, ni, kb, mi, ni, kb,
A + LDA * i + i, LDA, A + LDA * i + i, LDA,
T + LDT * i, LDT, T + LDT * i, LDT,
C + LDC * jc + ic, LDC, C + LDC * jc + ic, LDC,
WORK, LDWORK); WORK, LDWORK, CUBLAS_STREAM_VALUE);
} }
return MORSE_SUCCESS; return MORSE_SUCCESS;
} }
#endif
...@@ -24,14 +24,14 @@ ...@@ -24,14 +24,14 @@
**/ **/
#include "cudablas/include/cudablas.h" #include "cudablas/include/cudablas.h"
#if defined(CHAMELEON_USE_MAGMA) int
int CUDA_zunmqrt( CUDA_zunmqrt(MORSE_enum side, MORSE_enum trans,
magma_side_t side, magma_trans_t trans, int M, int N, int K, int IB,
magma_int_t M, magma_int_t N, magma_int_t K, magma_int_t IB, const cuDoubleComplex *A, int LDA,
const magmaDoubleComplex *A, magma_int_t LDA, const cuDoubleComplex *T, int LDT,
const magmaDoubleComplex *T, magma_int_t LDT, cuDoubleComplex *C, int LDC,
magmaDoubleComplex *C, magma_int_t LDC, cuDoubleComplex *WORK, int LDWORK,
magmaDoubleComplex *WORK, magma_int_t LDWORK ) CUBLAS_STREAM_PARAM )
{ {
int i, kb; int i, kb;
int i1, i3; int i1, i3;
...@@ -42,13 +42,13 @@ int CUDA_zunmqrt( ...@@ -42,13 +42,13 @@ int CUDA_zunmqrt(
int mi = M; int mi = M;
/* Check input arguments */ /* Check input arguments */
if ((side != MagmaLeft) && (side != MagmaRight)) { if ((side != MorseLeft) && (side != MorseRight)) {
return -1; return -1;
} }
/* /*
* NQ is the order of Q and NW is the minimum dimension of WORK * NQ is the order of Q and NW is the minimum dimension of WORK
*/ */
if (side == MagmaLeft) { if (side == MorseLeft) {
nq = M; nq = M;
nw = N; nw = N;
} }
...@@ -57,7 +57,7 @@ int CUDA_zunmqrt( ...@@ -57,7 +57,7 @@ int CUDA_zunmqrt(
nw = M; nw = M;
} }
if ((trans != MagmaNoTrans) && (trans != MagmaConjTrans)) { if ((trans != MorseNoTrans) && (trans != MorseConjTrans)) {
return -2; return -2;
} }
if (M < 0) { if (M < 0) {
...@@ -84,10 +84,10 @@ int CUDA_zunmqrt( ...@@ -84,10 +84,10 @@ int CUDA_zunmqrt(
/* Quick return */ /* Quick return */
if ((M == 0) || (N == 0) || (K == 0)) if ((M == 0) || (N == 0) || (K == 0))
return MAGMA_SUCCESS; return MORSE_SUCCESS;
if (((side == MagmaLeft) && (trans != MagmaNoTrans)) if (((side == MorseLeft) && (trans != MorseNoTrans))
|| ((side == MagmaRight) && (trans == MagmaNoTrans))) { || ((side == MorseRight) && (trans == MorseNoTrans))) {
i1 = 0; i1 = 0;
i3 = IB; i3 = IB;
} }
...@@ -99,7 +99,7 @@ int CUDA_zunmqrt( ...@@ -99,7 +99,7 @@ int CUDA_zunmqrt(
for(i = i1; (i >- 1) && (i < K); i+=i3 ) { for(i = i1; (i >- 1) && (i < K); i+=i3 ) {
kb = min(IB, K-i); kb = min(IB, K-i);
if (side == MagmaLeft) { if (side == MorseLeft) {
/* /*
* H or H' is applied to C(i:m,1:n) * H or H' is applied to C(i:m,1:n)
*/ */
...@@ -114,14 +114,14 @@ int CUDA_zunmqrt( ...@@ -114,14 +114,14 @@ int CUDA_zunmqrt(
jc = i; jc = i;
} }
magma_zlarfb_gpu( side, trans, MagmaForward, MagmaColumnwise, CUDA_zlarfb( side, trans, MorseForward, MorseColumnwise,
mi, ni, kb, mi, ni, kb,
A + LDA * i + i, LDA, A + LDA * i + i, LDA,
T + LDT * i, LDT, T + LDT * i, LDT,
C + LDC * jc + ic, LDC, C + LDC * jc + ic, LDC,
WORK, LDWORK); WORK, LDWORK,
CUBLAS_STREAM_VALUE );
} }
return MORSE_SUCCESS; return MORSE_SUCCESS;
} }
#endif
...@@ -54,6 +54,7 @@ int CUDA_zgeqrt( magma_int_t m, magma_int_t n, magma_int_t nb, magmaDoubleComple ...@@ -54,6 +54,7 @@ int CUDA_zgeqrt( magma_int_t m, magma_int_t n, magma_int_t nb, magmaDoubleComple
int CUDA_zgessm( char storev, magma_int_t m, magma_int_t n, magma_int_t k, magma_int_t ib, magma_int_t *ipiv, cuDoubleComplex *dL1, magma_int_t lddl1, cuDoubleComplex *dL, magma_int_t lddl, cuDoubleComplex *dA, magma_int_t ldda, magma_int_t *info); int CUDA_zgessm( char storev, magma_int_t m, magma_int_t n, magma_int_t k, magma_int_t ib, magma_int_t *ipiv, cuDoubleComplex *dL1, magma_int_t lddl1, cuDoubleComplex *dL, magma_int_t lddl, cuDoubleComplex *dA, magma_int_t ldda, magma_int_t *info);
int CUDA_zgetrf_incpiv( char storev, magma_int_t m, magma_int_t n, magma_int_t ib, cuDoubleComplex *hA, magma_int_t ldha, cuDoubleComplex *dA, magma_int_t ldda, cuDoubleComplex *hL, magma_int_t ldhl, cuDoubleComplex *dL, magma_int_t lddl, magma_int_t *ipiv, cuDoubleComplex *dwork, magma_int_t lddwork, magma_int_t *info); int CUDA_zgetrf_incpiv( char storev, magma_int_t m, magma_int_t n, magma_int_t ib, cuDoubleComplex *hA, magma_int_t ldha, cuDoubleComplex *dA, magma_int_t ldda, cuDoubleComplex *hL, magma_int_t ldhl, cuDoubleComplex *dL, magma_int_t lddl, magma_int_t *ipiv, cuDoubleComplex *dwork, magma_int_t lddwork, magma_int_t *info);
int CUDA_zgetrf_nopiv( magma_int_t m, magma_int_t n, cuDoubleComplex *dA, magma_int_t ldda, magma_int_t *info); int CUDA_zgetrf_nopiv( magma_int_t m, magma_int_t n, cuDoubleComplex *dA, magma_int_t ldda, magma_int_t *info);
int CUDA_zlarfb(MORSE_enum side, MORSE_enum trans, MORSE_enum direct, MORSE_enum storev, int M, int N, int K, const cuDoubleComplex *V, int LDV, const cuDoubleComplex *T, int LDT, cuDoubleComplex *C, int LDC, cuDoubleComplex *WORK, int LDWORK, CUBLAS_STREAM_PARAM );
int CUDA_zlauum( char uplo, magma_int_t n, cuDoubleComplex *dA, magma_int_t ldda, magma_int_t *info); int CUDA_zlauum( char uplo, magma_int_t n, cuDoubleComplex *dA, magma_int_t ldda, magma_int_t *info);
int CUDA_zpotrf( magma_uplo_t uplo, magma_int_t n, magmaDoubleComplex *dA, magma_int_t ldda, magma_int_t *info); int CUDA_zpotrf( magma_uplo_t uplo, magma_int_t n, magmaDoubleComplex *dA, magma_int_t ldda, magma_int_t *info);
int CUDA_zssssm( magma_storev_t storev, magma_int_t m1, magma_int_t n1, magma_int_t m2, magma_int_t n2, magma_int_t k, magma_int_t ib, magmaDoubleComplex *dA1, magma_int_t ldda1, magmaDoubleComplex *dA2, magma_int_t ldda2, magmaDoubleComplex *dL1, magma_int_t lddl1, magmaDoubleComplex *dL2, magma_int_t lddl2, magma_int_t *IPIV, magma_int_t *info); int CUDA_zssssm( magma_storev_t storev, magma_int_t m1, magma_int_t n1, magma_int_t m2, magma_int_t n2, magma_int_t k, magma_int_t ib, magmaDoubleComplex *dA1, magma_int_t ldda1, magmaDoubleComplex *dA2, magma_int_t ldda2, magmaDoubleComplex *dL1, magma_int_t lddl1, magmaDoubleComplex *dL2, magma_int_t lddl2, magma_int_t *IPIV, magma_int_t *info);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment