diff --git a/coreblas/compute/core_ztsmqr.c b/coreblas/compute/core_ztsmqr.c index 0792655eefe1ba2dc8ca986d7a36d17bed8d16b3..10d4f2d03a98a135c8062c0618996572d8489829 100644 --- a/coreblas/compute/core_ztsmqr.c +++ b/coreblas/compute/core_ztsmqr.c @@ -254,10 +254,10 @@ int CORE_ztsmqr(MORSE_enum side, MORSE_enum trans, CORE_zparfb( side, trans, MorseForward, MorseColumnwise, mi, ni, M2, N2, kb, 0, - &A1[LDA1*jc+ic], LDA1, + A1 + LDA1*jc+ic, LDA1, A2, LDA2, - &V[LDV*i], LDV, - &T[LDT*i], LDT, + V + LDV*i, LDV, + T + LDT*i, LDT, WORK, LDWORK); } return MORSE_SUCCESS; diff --git a/cudablas/compute/CMakeLists.txt b/cudablas/compute/CMakeLists.txt index 7ef91aee08ef11362bc8115316fb7a69a4bd56d8..d05705eb7fa4a50aeed8bb7e00b707e89e7bf5ae 100644 --- a/cudablas/compute/CMakeLists.txt +++ b/cudablas/compute/CMakeLists.txt @@ -32,6 +32,7 @@ set(ZSRC cuda_zhemm.c cuda_zher2k.c cuda_zherk.c + cuda_zlarfb.c cuda_zparfb.c cuda_zsymm.c cuda_zsyr2k.c @@ -40,6 +41,8 @@ set(ZSRC cuda_ztrsm.c cuda_ztsmlq.c cuda_ztsmqr.c + cuda_zunmlqt.c + cuda_zunmqrt.c ) if( CHAMELEON_USE_MAGMA ) @@ -50,15 +53,12 @@ if( CHAMELEON_USE_MAGMA ) cuda_zgessm.c cuda_zgetrf.c cuda_zlauum.c - cuda_zparfb.c cuda_zpotrf.c cuda_zssssm.c cuda_ztrtri.c cuda_ztslqt.c cuda_ztsqrt.c cuda_ztstrf.c - cuda_zunmlqt.c - cuda_zunmqrt.c ) endif() diff --git a/cudablas/compute/cuda_ztsmqr.c b/cudablas/compute/cuda_ztsmqr.c index a234d2f2178f2edea85f413e08db80bb0ec24c1a..e104a085fcccd05a3f4b6746ec98f6713f97025d 100644 --- a/cudablas/compute/cuda_ztsmqr.c +++ b/cudablas/compute/cuda_ztsmqr.c @@ -132,7 +132,6 @@ int CUDA_ztsmqr( ni = N1 - i; jc = i; } - /* * Apply H or H' (NOTE: CORE_zparfb used to be CORE_ztsrfb) */ diff --git a/cudablas/compute/cuda_zunmlqt.c b/cudablas/compute/cuda_zunmlqt.c index 55ae68ed671dc94bf7b156146c19e04b090a9529..c9a15473376f72c6d9a0dcee40b2da06e29adf5e 100644 --- a/cudablas/compute/cuda_zunmlqt.c +++ b/cudablas/compute/cuda_zunmlqt.c @@ -24,14 +24,14 @@ **/ #include "cudablas/include/cudablas.h" -#if defined(CHAMELEON_USE_MAGMA) -int CUDA_zunmlqt( - magma_side_t side, magma_trans_t trans, - magma_int_t M, magma_int_t N, magma_int_t K, magma_int_t IB, - const magmaDoubleComplex *A, magma_int_t LDA, - const magmaDoubleComplex *T, magma_int_t LDT, - magmaDoubleComplex *C, magma_int_t LDC, - magmaDoubleComplex *WORK, magma_int_t LDWORK ) +int +CUDA_zunmlqt(MORSE_enum side, MORSE_enum trans, + int M, int N, int K, int IB, + const cuDoubleComplex *A, int LDA, + const cuDoubleComplex *T, int LDT, + cuDoubleComplex *C, int LDC, + cuDoubleComplex *WORK, int LDWORK, + CUBLAS_STREAM_PARAM ) { int i, kb; int i1, i3; @@ -42,13 +42,13 @@ int CUDA_zunmlqt( int mi = M; /* Check input arguments */ - if ((side != MagmaLeft) && (side != MagmaRight)) { + if ((side != MorseLeft) && (side != MorseRight)) { return -1; } /* * NQ is the order of Q and NW is the minimum dimension of WORK */ - if (side == MagmaLeft) { + if (side == MorseLeft) { nq = M; nw = N; } @@ -57,7 +57,7 @@ int CUDA_zunmlqt( nw = M; } - if ((trans != MagmaNoTrans) && (trans != MagmaConjTrans)) { + if ((trans != MorseNoTrans) && (trans != MorseConjTrans)) { return -2; } if (M < 0) { @@ -84,10 +84,10 @@ int CUDA_zunmlqt( /* Quick return */ if ((M == 0) || (N == 0) || (K == 0)) - return MAGMA_SUCCESS; + return MORSE_SUCCESS; - if (((side == MagmaLeft) && (trans == MagmaNoTrans)) - || ((side == MagmaRight) && (trans != MagmaNoTrans))) { + if (((side == MorseLeft) && (trans == MorseNoTrans)) + || ((side == MorseRight) && (trans != MorseNoTrans))) { i1 = 0; i3 = IB; } @@ -106,7 +106,7 @@ int CUDA_zunmlqt( for(i = i1; (i >- 1) && (i < K); i+=i3 ) { kb = min(IB, K-i); - if (side == MagmaLeft) { + if (side == MorseLeft) { /* * H or H' is applied to C(i:m,1:n) */ @@ -121,13 +121,13 @@ int CUDA_zunmlqt( jc = i; } - magma_zlarfb_gpu( side, trans, MagmaForward, MagmaRowwise, - mi, ni, kb, - A + LDA * i + i, LDA, - T + LDT * i, LDT, - C + LDC * jc + ic, LDC, - WORK, LDWORK); + CUDA_zlarfb( side, trans, MorseForward, MorseRowwise, + mi, ni, kb, + A + LDA * i + i, LDA, + T + LDT * i, LDT, + C + LDC * jc + ic, LDC, + WORK, LDWORK, CUBLAS_STREAM_VALUE); } return MORSE_SUCCESS; } -#endif + diff --git a/cudablas/compute/cuda_zunmqrt.c b/cudablas/compute/cuda_zunmqrt.c index e868b1e9277f2a333cd2a33ef33f3b6c4a65cd1b..6032cabc4bb04e58a637370078abc19509374433 100644 --- a/cudablas/compute/cuda_zunmqrt.c +++ b/cudablas/compute/cuda_zunmqrt.c @@ -24,14 +24,14 @@ **/ #include "cudablas/include/cudablas.h" -#if defined(CHAMELEON_USE_MAGMA) -int CUDA_zunmqrt( - magma_side_t side, magma_trans_t trans, - magma_int_t M, magma_int_t N, magma_int_t K, magma_int_t IB, - const magmaDoubleComplex *A, magma_int_t LDA, - const magmaDoubleComplex *T, magma_int_t LDT, - magmaDoubleComplex *C, magma_int_t LDC, - magmaDoubleComplex *WORK, magma_int_t LDWORK ) +int +CUDA_zunmqrt(MORSE_enum side, MORSE_enum trans, + int M, int N, int K, int IB, + const cuDoubleComplex *A, int LDA, + const cuDoubleComplex *T, int LDT, + cuDoubleComplex *C, int LDC, + cuDoubleComplex *WORK, int LDWORK, + CUBLAS_STREAM_PARAM ) { int i, kb; int i1, i3; @@ -42,13 +42,13 @@ int CUDA_zunmqrt( int mi = M; /* Check input arguments */ - if ((side != MagmaLeft) && (side != MagmaRight)) { + if ((side != MorseLeft) && (side != MorseRight)) { return -1; } /* * NQ is the order of Q and NW is the minimum dimension of WORK */ - if (side == MagmaLeft) { + if (side == MorseLeft) { nq = M; nw = N; } @@ -57,7 +57,7 @@ int CUDA_zunmqrt( nw = M; } - if ((trans != MagmaNoTrans) && (trans != MagmaConjTrans)) { + if ((trans != MorseNoTrans) && (trans != MorseConjTrans)) { return -2; } if (M < 0) { @@ -84,10 +84,10 @@ int CUDA_zunmqrt( /* Quick return */ if ((M == 0) || (N == 0) || (K == 0)) - return MAGMA_SUCCESS; + return MORSE_SUCCESS; - if (((side == MagmaLeft) && (trans != MagmaNoTrans)) - || ((side == MagmaRight) && (trans == MagmaNoTrans))) { + if (((side == MorseLeft) && (trans != MorseNoTrans)) + || ((side == MorseRight) && (trans == MorseNoTrans))) { i1 = 0; i3 = IB; } @@ -99,7 +99,7 @@ int CUDA_zunmqrt( for(i = i1; (i >- 1) && (i < K); i+=i3 ) { kb = min(IB, K-i); - if (side == MagmaLeft) { + if (side == MorseLeft) { /* * H or H' is applied to C(i:m,1:n) */ @@ -114,14 +114,14 @@ int CUDA_zunmqrt( jc = i; } - magma_zlarfb_gpu( side, trans, MagmaForward, MagmaColumnwise, - mi, ni, kb, - A + LDA * i + i, LDA, - T + LDT * i, LDT, - C + LDC * jc + ic, LDC, - WORK, LDWORK); + CUDA_zlarfb( side, trans, MorseForward, MorseColumnwise, + mi, ni, kb, + A + LDA * i + i, LDA, + T + LDT * i, LDT, + C + LDC * jc + ic, LDC, + WORK, LDWORK, + CUBLAS_STREAM_VALUE ); } return MORSE_SUCCESS; } -#endif diff --git a/cudablas/include/cudablas_z.h b/cudablas/include/cudablas_z.h index 064583858fc3ce83c6a51a8285b65e49913f6e30..6f622b44e8c7270f6d97f85a905cdd77bc18245c 100644 --- a/cudablas/include/cudablas_z.h +++ b/cudablas/include/cudablas_z.h @@ -54,6 +54,7 @@ int CUDA_zgeqrt( magma_int_t m, magma_int_t n, magma_int_t nb, magmaDoubleComple int CUDA_zgessm( char storev, magma_int_t m, magma_int_t n, magma_int_t k, magma_int_t ib, magma_int_t *ipiv, cuDoubleComplex *dL1, magma_int_t lddl1, cuDoubleComplex *dL, magma_int_t lddl, cuDoubleComplex *dA, magma_int_t ldda, magma_int_t *info); int CUDA_zgetrf_incpiv( char storev, magma_int_t m, magma_int_t n, magma_int_t ib, cuDoubleComplex *hA, magma_int_t ldha, cuDoubleComplex *dA, magma_int_t ldda, cuDoubleComplex *hL, magma_int_t ldhl, cuDoubleComplex *dL, magma_int_t lddl, magma_int_t *ipiv, cuDoubleComplex *dwork, magma_int_t lddwork, magma_int_t *info); int CUDA_zgetrf_nopiv( magma_int_t m, magma_int_t n, cuDoubleComplex *dA, magma_int_t ldda, magma_int_t *info); +int CUDA_zlarfb(MORSE_enum side, MORSE_enum trans, MORSE_enum direct, MORSE_enum storev, int M, int N, int K, const cuDoubleComplex *V, int LDV, const cuDoubleComplex *T, int LDT, cuDoubleComplex *C, int LDC, cuDoubleComplex *WORK, int LDWORK, CUBLAS_STREAM_PARAM ); int CUDA_zlauum( char uplo, magma_int_t n, cuDoubleComplex *dA, magma_int_t ldda, magma_int_t *info); int CUDA_zpotrf( magma_uplo_t uplo, magma_int_t n, magmaDoubleComplex *dA, magma_int_t ldda, magma_int_t *info); int CUDA_zssssm( magma_storev_t storev, magma_int_t m1, magma_int_t n1, magma_int_t m2, magma_int_t n2, magma_int_t k, magma_int_t ib, magmaDoubleComplex *dA1, magma_int_t ldda1, magmaDoubleComplex *dA2, magma_int_t ldda2, magmaDoubleComplex *dL1, magma_int_t lddl1, magmaDoubleComplex *dL2, magma_int_t lddl2, magma_int_t *IPIV, magma_int_t *info);