Commit 31e42080 authored by Mathieu Faverge's avatar Mathieu Faverge

Make unmqr/unmlq call the larfb

parent afa6c8f7
......@@ -254,10 +254,10 @@ int CORE_ztsmqr(MORSE_enum side, MORSE_enum trans,
CORE_zparfb(
side, trans, MorseForward, MorseColumnwise,
mi, ni, M2, N2, kb, 0,
&A1[LDA1*jc+ic], LDA1,
A1 + LDA1*jc+ic, LDA1,
A2, LDA2,
&V[LDV*i], LDV,
&T[LDT*i], LDT,
V + LDV*i, LDV,
T + LDT*i, LDT,
WORK, LDWORK);
}
return MORSE_SUCCESS;
......
......@@ -32,6 +32,7 @@ set(ZSRC
cuda_zhemm.c
cuda_zher2k.c
cuda_zherk.c
cuda_zlarfb.c
cuda_zparfb.c
cuda_zsymm.c
cuda_zsyr2k.c
......@@ -40,6 +41,8 @@ set(ZSRC
cuda_ztrsm.c
cuda_ztsmlq.c
cuda_ztsmqr.c
cuda_zunmlqt.c
cuda_zunmqrt.c
)
if( CHAMELEON_USE_MAGMA )
......@@ -50,15 +53,12 @@ if( CHAMELEON_USE_MAGMA )
cuda_zgessm.c
cuda_zgetrf.c
cuda_zlauum.c
cuda_zparfb.c
cuda_zpotrf.c
cuda_zssssm.c
cuda_ztrtri.c
cuda_ztslqt.c
cuda_ztsqrt.c
cuda_ztstrf.c
cuda_zunmlqt.c
cuda_zunmqrt.c
)
endif()
......
......@@ -132,7 +132,6 @@ int CUDA_ztsmqr(
ni = N1 - i;
jc = i;
}
/*
* Apply H or H' (NOTE: CORE_zparfb used to be CORE_ztsrfb)
*/
......
......@@ -24,14 +24,14 @@
**/
#include "cudablas/include/cudablas.h"
#if defined(CHAMELEON_USE_MAGMA)
int CUDA_zunmlqt(
magma_side_t side, magma_trans_t trans,
magma_int_t M, magma_int_t N, magma_int_t K, magma_int_t IB,
const magmaDoubleComplex *A, magma_int_t LDA,
const magmaDoubleComplex *T, magma_int_t LDT,
magmaDoubleComplex *C, magma_int_t LDC,
magmaDoubleComplex *WORK, magma_int_t LDWORK )
int
CUDA_zunmlqt(MORSE_enum side, MORSE_enum trans,
int M, int N, int K, int IB,
const cuDoubleComplex *A, int LDA,
const cuDoubleComplex *T, int LDT,
cuDoubleComplex *C, int LDC,
cuDoubleComplex *WORK, int LDWORK,
CUBLAS_STREAM_PARAM )
{
int i, kb;
int i1, i3;
......@@ -42,13 +42,13 @@ int CUDA_zunmlqt(
int mi = M;
/* Check input arguments */
if ((side != MagmaLeft) && (side != MagmaRight)) {
if ((side != MorseLeft) && (side != MorseRight)) {
return -1;
}
/*
* NQ is the order of Q and NW is the minimum dimension of WORK
*/
if (side == MagmaLeft) {
if (side == MorseLeft) {
nq = M;
nw = N;
}
......@@ -57,7 +57,7 @@ int CUDA_zunmlqt(
nw = M;
}
if ((trans != MagmaNoTrans) && (trans != MagmaConjTrans)) {
if ((trans != MorseNoTrans) && (trans != MorseConjTrans)) {
return -2;
}
if (M < 0) {
......@@ -84,10 +84,10 @@ int CUDA_zunmlqt(
/* Quick return */
if ((M == 0) || (N == 0) || (K == 0))
return MAGMA_SUCCESS;
return MORSE_SUCCESS;
if (((side == MagmaLeft) && (trans == MagmaNoTrans))
|| ((side == MagmaRight) && (trans != MagmaNoTrans))) {
if (((side == MorseLeft) && (trans == MorseNoTrans))
|| ((side == MorseRight) && (trans != MorseNoTrans))) {
i1 = 0;
i3 = IB;
}
......@@ -106,7 +106,7 @@ int CUDA_zunmlqt(
for(i = i1; (i >- 1) && (i < K); i+=i3 ) {
kb = min(IB, K-i);
if (side == MagmaLeft) {
if (side == MorseLeft) {
/*
* H or H' is applied to C(i:m,1:n)
*/
......@@ -121,13 +121,13 @@ int CUDA_zunmlqt(
jc = i;
}
magma_zlarfb_gpu( side, trans, MagmaForward, MagmaRowwise,
mi, ni, kb,
A + LDA * i + i, LDA,
T + LDT * i, LDT,
C + LDC * jc + ic, LDC,
WORK, LDWORK);
CUDA_zlarfb( side, trans, MorseForward, MorseRowwise,
mi, ni, kb,
A + LDA * i + i, LDA,
T + LDT * i, LDT,
C + LDC * jc + ic, LDC,
WORK, LDWORK, CUBLAS_STREAM_VALUE);
}
return MORSE_SUCCESS;
}
#endif
......@@ -24,14 +24,14 @@
**/
#include "cudablas/include/cudablas.h"
#if defined(CHAMELEON_USE_MAGMA)
int CUDA_zunmqrt(
magma_side_t side, magma_trans_t trans,
magma_int_t M, magma_int_t N, magma_int_t K, magma_int_t IB,
const magmaDoubleComplex *A, magma_int_t LDA,
const magmaDoubleComplex *T, magma_int_t LDT,
magmaDoubleComplex *C, magma_int_t LDC,
magmaDoubleComplex *WORK, magma_int_t LDWORK )
int
CUDA_zunmqrt(MORSE_enum side, MORSE_enum trans,
int M, int N, int K, int IB,
const cuDoubleComplex *A, int LDA,
const cuDoubleComplex *T, int LDT,
cuDoubleComplex *C, int LDC,
cuDoubleComplex *WORK, int LDWORK,
CUBLAS_STREAM_PARAM )
{
int i, kb;
int i1, i3;
......@@ -42,13 +42,13 @@ int CUDA_zunmqrt(
int mi = M;
/* Check input arguments */
if ((side != MagmaLeft) && (side != MagmaRight)) {
if ((side != MorseLeft) && (side != MorseRight)) {
return -1;
}
/*
* NQ is the order of Q and NW is the minimum dimension of WORK
*/
if (side == MagmaLeft) {
if (side == MorseLeft) {
nq = M;
nw = N;
}
......@@ -57,7 +57,7 @@ int CUDA_zunmqrt(
nw = M;
}
if ((trans != MagmaNoTrans) && (trans != MagmaConjTrans)) {
if ((trans != MorseNoTrans) && (trans != MorseConjTrans)) {
return -2;
}
if (M < 0) {
......@@ -84,10 +84,10 @@ int CUDA_zunmqrt(
/* Quick return */
if ((M == 0) || (N == 0) || (K == 0))
return MAGMA_SUCCESS;
return MORSE_SUCCESS;
if (((side == MagmaLeft) && (trans != MagmaNoTrans))
|| ((side == MagmaRight) && (trans == MagmaNoTrans))) {
if (((side == MorseLeft) && (trans != MorseNoTrans))
|| ((side == MorseRight) && (trans == MorseNoTrans))) {
i1 = 0;
i3 = IB;
}
......@@ -99,7 +99,7 @@ int CUDA_zunmqrt(
for(i = i1; (i >- 1) && (i < K); i+=i3 ) {
kb = min(IB, K-i);
if (side == MagmaLeft) {
if (side == MorseLeft) {
/*
* H or H' is applied to C(i:m,1:n)
*/
......@@ -114,14 +114,14 @@ int CUDA_zunmqrt(
jc = i;
}
magma_zlarfb_gpu( side, trans, MagmaForward, MagmaColumnwise,
mi, ni, kb,
A + LDA * i + i, LDA,
T + LDT * i, LDT,
C + LDC * jc + ic, LDC,
WORK, LDWORK);
CUDA_zlarfb( side, trans, MorseForward, MorseColumnwise,
mi, ni, kb,
A + LDA * i + i, LDA,
T + LDT * i, LDT,
C + LDC * jc + ic, LDC,
WORK, LDWORK,
CUBLAS_STREAM_VALUE );
}
return MORSE_SUCCESS;
}
#endif
......@@ -54,6 +54,7 @@ int CUDA_zgeqrt( magma_int_t m, magma_int_t n, magma_int_t nb, magmaDoubleComple
int CUDA_zgessm( char storev, magma_int_t m, magma_int_t n, magma_int_t k, magma_int_t ib, magma_int_t *ipiv, cuDoubleComplex *dL1, magma_int_t lddl1, cuDoubleComplex *dL, magma_int_t lddl, cuDoubleComplex *dA, magma_int_t ldda, magma_int_t *info);
int CUDA_zgetrf_incpiv( char storev, magma_int_t m, magma_int_t n, magma_int_t ib, cuDoubleComplex *hA, magma_int_t ldha, cuDoubleComplex *dA, magma_int_t ldda, cuDoubleComplex *hL, magma_int_t ldhl, cuDoubleComplex *dL, magma_int_t lddl, magma_int_t *ipiv, cuDoubleComplex *dwork, magma_int_t lddwork, magma_int_t *info);
int CUDA_zgetrf_nopiv( magma_int_t m, magma_int_t n, cuDoubleComplex *dA, magma_int_t ldda, magma_int_t *info);
int CUDA_zlarfb(MORSE_enum side, MORSE_enum trans, MORSE_enum direct, MORSE_enum storev, int M, int N, int K, const cuDoubleComplex *V, int LDV, const cuDoubleComplex *T, int LDT, cuDoubleComplex *C, int LDC, cuDoubleComplex *WORK, int LDWORK, CUBLAS_STREAM_PARAM );
int CUDA_zlauum( char uplo, magma_int_t n, cuDoubleComplex *dA, magma_int_t ldda, magma_int_t *info);
int CUDA_zpotrf( magma_uplo_t uplo, magma_int_t n, magmaDoubleComplex *dA, magma_int_t ldda, magma_int_t *info);
int CUDA_zssssm( magma_storev_t storev, magma_int_t m1, magma_int_t n1, magma_int_t m2, magma_int_t n2, magma_int_t k, magma_int_t ib, magmaDoubleComplex *dA1, magma_int_t ldda1, magmaDoubleComplex *dA2, magma_int_t ldda2, magmaDoubleComplex *dL1, magma_int_t lddl1, magmaDoubleComplex *dL2, magma_int_t lddl2, magma_int_t *IPIV, magma_int_t *info);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment