diff --git a/cudablas/compute/cuda_zgeadd.c b/cudablas/compute/cuda_zgeadd.c index b770932073c64d232c237ce586df0a508b62b91b..78ebaf077bb1004dda107384133da93044f2f263 100644 --- a/cudablas/compute/cuda_zgeadd.c +++ b/cudablas/compute/cuda_zgeadd.c @@ -72,22 +72,24 @@ * @retval <0 if -i, the i-th argument had an illegal value * */ -int CUDA_zgeadd(cham_trans_t trans, - int m, int n, - const cuDoubleComplex *alpha, - const cuDoubleComplex *A, int lda, - const cuDoubleComplex *beta, - cuDoubleComplex *B, int ldb, - CUBLAS_STREAM_PARAM) +int CUDA_zgeadd( cham_trans_t trans, + int m, int n, + const cuDoubleComplex *alpha, + const cuDoubleComplex *A, int lda, + const cuDoubleComplex *beta, + cuDoubleComplex *B, int ldb, + cublasHandle_t handle ) { - cublasZgeam(CUBLAS_HANDLE - chameleon_cublas_const(trans), chameleon_cublas_const(ChamNoTrans), - m, n, - CUBLAS_VALUE(alpha), A, lda, - CUBLAS_VALUE(beta), B, ldb, - B, ldb); + cublasStatus_t rc; - assert( CUBLAS_STATUS_SUCCESS == cublasGetError() ); + rc = cublasZgeam( handle, + chameleon_cublas_const(trans), chameleon_cublas_const(ChamNoTrans), + m, n, + CUBLAS_VALUE(alpha), A, lda, + CUBLAS_VALUE(beta), B, ldb, + B, ldb ); + assert( rc == CUBLAS_STATUS_SUCCESS ); + (void)rc; return CHAMELEON_SUCCESS; } diff --git a/cudablas/compute/cuda_zgemerge.c b/cudablas/compute/cuda_zgemerge.c index 9f3f8d134b1695e69ce1537c429ef013cbd945de..de144f7904a9340683e0da4a7e2fb59d9d474319 100644 --- a/cudablas/compute/cuda_zgemerge.c +++ b/cudablas/compute/cuda_zgemerge.c @@ -25,11 +25,12 @@ CUDA_zgemerge( cham_side_t side, cham_diag_t diag, int M, int N, const cuDoubleComplex *A, int LDA, cuDoubleComplex *B, int LDB, - CUBLAS_STREAM_PARAM) + cublasHandle_t handle ) { - int i; const cuDoubleComplex *cola; cuDoubleComplex *colb; + cublasStatus_t rc; + int i; if (M < 0) { return -1; @@ -44,26 +45,23 @@ CUDA_zgemerge( cham_side_t side, cham_diag_t diag, return -7; } - CUBLAS_GET_STREAM; - if (side == ChamLeft){ for(i=0; i<N; i++){ cola = A + i*LDA; colb = B + i*LDB; - cudaMemcpyAsync(colb , cola, - (i+1)*sizeof(cuDoubleComplex), - cudaMemcpyDeviceToDevice, stream); + rc = cublasZcopy( handle, i+1, cola, 1, colb, 1 ); + assert( rc == CUBLAS_STATUS_SUCCESS ); } }else{ for(i=0; i<N; i++){ cola = A + i*LDA; colb = B + i*LDB; - cudaMemcpyAsync(colb+i , cola+i, - (M-i)*sizeof(cuDoubleComplex), - cudaMemcpyDeviceToDevice, stream); + rc = cublasZcopy( handle, M-i, cola, 1, colb, 1 ); + assert( rc == CUBLAS_STATUS_SUCCESS ); } } (void)diag; + (void)rc; return CHAMELEON_SUCCESS; } diff --git a/cudablas/compute/cuda_zgemm.c b/cudablas/compute/cuda_zgemm.c index 7a5900b761ad081181e6b47977e540aa16797b1b..7a9da427a944164fe1b3537bd5191e7d2be88c51 100644 --- a/cudablas/compute/cuda_zgemm.c +++ b/cudablas/compute/cuda_zgemm.c @@ -20,22 +20,26 @@ */ #include "cudablas.h" -int CUDA_zgemm(cham_trans_t transa, cham_trans_t transb, - int m, int n, int k, - const cuDoubleComplex *alpha, - const cuDoubleComplex *A, int lda, - const cuDoubleComplex *B, int ldb, - const cuDoubleComplex *beta, - cuDoubleComplex *C, int ldc, - CUBLAS_STREAM_PARAM) +int +CUDA_zgemm( cham_trans_t transa, cham_trans_t transb, + int m, int n, int k, + const cuDoubleComplex *alpha, + const cuDoubleComplex *A, int lda, + const cuDoubleComplex *B, int ldb, + const cuDoubleComplex *beta, + cuDoubleComplex *C, int ldc, + cublasHandle_t handle ) { - cublasZgemm(CUBLAS_HANDLE - chameleon_cublas_const(transa), chameleon_cublas_const(transb), - m, n, k, - CUBLAS_VALUE(alpha), A, lda, - B, ldb, - CUBLAS_VALUE(beta), C, ldc); + cublasStatus_t rc; - assert( CUBLAS_STATUS_SUCCESS == cublasGetError() ); + rc = cublasZgemm( handle, + chameleon_cublas_const(transa), chameleon_cublas_const(transb), + m, n, k, + CUBLAS_VALUE(alpha), A, lda, + B, ldb, + CUBLAS_VALUE(beta), C, ldc); + + assert( rc == CUBLAS_STATUS_SUCCESS ); + (void)rc; return CHAMELEON_SUCCESS; } diff --git a/cudablas/compute/cuda_zgeqrt.c b/cudablas/compute/cuda_zgeqrt.c index 32abd8f90d3a87e7efacfa11264c3c096e18df30..d2d33026f7f290ab9d92b69de406d4a98d13c413 100644 --- a/cudablas/compute/cuda_zgeqrt.c +++ b/cudablas/compute/cuda_zgeqrt.c @@ -21,7 +21,8 @@ #include "cudablas.h" #if defined(CHAMELEON_USE_MAGMA) -int CUDA_zgeqrt( +int +CUDA_zgeqrt( magma_int_t m, magma_int_t n, magma_int_t nb, magmaDoubleComplex *da, magma_int_t ldda, magmaDoubleComplex *v, magma_int_t ldv, @@ -32,7 +33,7 @@ int CUDA_zgeqrt( magmaDoubleComplex *tau, magmaDoubleComplex *hwork, magmaDoubleComplex *dwork, - CUstream stream) + CUstream stream ) { #define da_ref(a_1,a_2) ( da+(a_2)*(ldda) + (a_1)) #define v_ref(a_1,a_2) ( v+(a_2)*(ldv) + (a_1)) diff --git a/cudablas/compute/cuda_zhemm.c b/cudablas/compute/cuda_zhemm.c index 26a470b3414aed3777a23e526ccd8e30febe75b4..95201c0357e6d3cdc0efe6ec05e06d892f581764 100644 --- a/cudablas/compute/cuda_zhemm.c +++ b/cudablas/compute/cuda_zhemm.c @@ -20,22 +20,26 @@ */ #include "cudablas.h" -int CUDA_zhemm(cham_side_t side, cham_uplo_t uplo, - int m, int n, - const cuDoubleComplex *alpha, - const cuDoubleComplex *A, int lda, - const cuDoubleComplex *B, int ldb, - const cuDoubleComplex *beta, - cuDoubleComplex *C, int ldc, - CUBLAS_STREAM_PARAM) +int +CUDA_zhemm( cham_side_t side, cham_uplo_t uplo, + int m, int n, + const cuDoubleComplex *alpha, + const cuDoubleComplex *A, int lda, + const cuDoubleComplex *B, int ldb, + const cuDoubleComplex *beta, + cuDoubleComplex *C, int ldc, + cublasHandle_t handle ) { - cublasZhemm(CUBLAS_HANDLE - chameleon_cublas_const(side), chameleon_cublas_const(uplo), - m, n, - CUBLAS_VALUE(alpha), A, lda, - B, ldb, - CUBLAS_VALUE(beta), C, ldc); + cublasStatus_t rc; - assert( CUBLAS_STATUS_SUCCESS == cublasGetError() ); + rc = cublasZhemm( handle, + chameleon_cublas_const(side), chameleon_cublas_const(uplo), + m, n, + CUBLAS_VALUE(alpha), A, lda, + B, ldb, + CUBLAS_VALUE(beta), C, ldc ); + + assert( rc == CUBLAS_STATUS_SUCCESS ); + (void)rc; return CHAMELEON_SUCCESS; } diff --git a/cudablas/compute/cuda_zher2k.c b/cudablas/compute/cuda_zher2k.c index 0fd08570d9722bbdcc9a0f419008f343355077a7..bc4e69c839ddcc64354464492dcd76265471762d 100644 --- a/cudablas/compute/cuda_zher2k.c +++ b/cudablas/compute/cuda_zher2k.c @@ -20,23 +20,26 @@ */ #include "cudablas.h" -int CUDA_zher2k(cham_uplo_t uplo, cham_trans_t trans, - int n, int k, - const cuDoubleComplex *alpha, - const cuDoubleComplex *A, int lda, - const cuDoubleComplex *B, int ldb, - const double *beta, - cuDoubleComplex *C, int ldc, - CUBLAS_STREAM_PARAM) +int +CUDA_zher2k( cham_uplo_t uplo, cham_trans_t trans, + int n, int k, + const cuDoubleComplex *alpha, + const cuDoubleComplex *A, int lda, + const cuDoubleComplex *B, int ldb, + const double *beta, + cuDoubleComplex *C, int ldc, + cublasHandle_t handle ) { - cublasZher2k(CUBLAS_HANDLE - chameleon_cublas_const(uplo), chameleon_cublas_const(trans), - n, k, - CUBLAS_VALUE(alpha), A, lda, - B, ldb, - CUBLAS_VALUE(beta), C, ldc); + cublasStatus_t rc; - assert( CUBLAS_STATUS_SUCCESS == cublasGetError() ); + rc = cublasZher2k( handle, + chameleon_cublas_const(uplo), chameleon_cublas_const(trans), + n, k, + CUBLAS_VALUE(alpha), A, lda, + B, ldb, + CUBLAS_VALUE(beta), C, ldc ); + assert( rc == CUBLAS_STATUS_SUCCESS ); + (void)rc; return CHAMELEON_SUCCESS; } diff --git a/cudablas/compute/cuda_zherfb.c b/cudablas/compute/cuda_zherfb.c index 13a0d9c24046e459f3acaebe0416604297f368d2..36e1784fb72f355068d549bc5229f8157a3af1ea 100644 --- a/cudablas/compute/cuda_zherfb.c +++ b/cudablas/compute/cuda_zherfb.c @@ -27,7 +27,7 @@ CUDA_zherfb( cham_uplo_t uplo, int n, const cuDoubleComplex *T, int ldt, cuDoubleComplex *C, int ldc, cuDoubleComplex *WORK, int ldwork, - CUBLAS_STREAM_PARAM ) + cublasHandle_t handle ) { /* Check input arguments */ if ((uplo != ChamUpper) && (uplo != ChamLower)) { @@ -67,21 +67,21 @@ CUDA_zherfb( cham_uplo_t uplo, int n, /* Left */ CUDA_zunmqrt( ChamLeft, ChamConjTrans, n, n, k, ib, A, lda, T, ldt, C, ldc, WORK, ldwork, - CUBLAS_STREAM_VALUE ); + handle ); /* Right */ CUDA_zunmqrt( ChamRight, ChamNoTrans, n, n, k, ib, A, lda, T, ldt, C, ldc, WORK, ldwork, - CUBLAS_STREAM_VALUE ); + handle ); } else { /* Right */ CUDA_zunmlqt( ChamRight, ChamConjTrans, n, n, k, ib, A, lda, T, ldt, C, ldc, WORK, ldwork, - CUBLAS_STREAM_VALUE ); + handle ); /* Left */ CUDA_zunmlqt( ChamLeft, ChamNoTrans, n, n, k, ib, A, lda, T, ldt, C, ldc, WORK, ldwork, - CUBLAS_STREAM_VALUE ); + handle ); } return 0; } diff --git a/cudablas/compute/cuda_zherk.c b/cudablas/compute/cuda_zherk.c index cd0d7ff6441932487e13e5ac4fc6ac9f59d3a12a..3ac3b66f12430e91fe50f0c1bef7e50888eae2c6 100644 --- a/cudablas/compute/cuda_zherk.c +++ b/cudablas/compute/cuda_zherk.c @@ -20,21 +20,24 @@ */ #include "cudablas.h" -int CUDA_zherk( cham_uplo_t uplo, cham_trans_t trans, - int n, int k, - const double *alpha, - const cuDoubleComplex *A, int lda, - const double *beta, - cuDoubleComplex *B, int ldb, - CUBLAS_STREAM_PARAM) +int +CUDA_zherk( cham_uplo_t uplo, cham_trans_t trans, + int n, int k, + const double *alpha, + const cuDoubleComplex *A, int lda, + const double *beta, + cuDoubleComplex *B, int ldb, + cublasHandle_t handle ) { - cublasZherk( CUBLAS_HANDLE - chameleon_cublas_const(uplo), chameleon_cublas_const(trans), - n, k, - CUBLAS_VALUE(alpha), A, lda, - CUBLAS_VALUE(beta), B, ldb); + cublasStatus_t rc; - assert( CUBLAS_STATUS_SUCCESS == cublasGetError() ); + rc = cublasZherk( handle, + chameleon_cublas_const(uplo), chameleon_cublas_const(trans), + n, k, + CUBLAS_VALUE(alpha), A, lda, + CUBLAS_VALUE(beta), B, ldb ); + assert( rc == CUBLAS_STATUS_SUCCESS ); + (void)rc; return CHAMELEON_SUCCESS; } diff --git a/cudablas/compute/cuda_zlarfb.c b/cudablas/compute/cuda_zlarfb.c index 3cc573ff59c813e7f9448ac847684e84de4a3f45..49a9baaa3622fbfbeed8f8ff173cc8d118c6c945 100644 --- a/cudablas/compute/cuda_zlarfb.c +++ b/cudablas/compute/cuda_zlarfb.c @@ -30,7 +30,7 @@ CUDA_zlarfb( cham_side_t side, cham_trans_t trans, const cuDoubleComplex *T, int LDT, cuDoubleComplex *C, int LDC, cuDoubleComplex *WORK, int LDWORK, - CUBLAS_STREAM_PARAM ) + cublasHandle_t handle ) { #if defined(PRECISION_z) || defined(PRECISION_c) cuDoubleComplex zzero = make_cuDoubleComplex(0.0, 0.0); @@ -42,8 +42,9 @@ CUDA_zlarfb( cham_side_t side, cham_trans_t trans, double mzone = -1.0; #endif /* defined(PRECISION_z) || defined(PRECISION_c) */ - cham_trans_t transT, notransV, transV; - cham_uplo_t uplo; + cublasStatus_t rc; + cham_trans_t transT, notransV, transV; + cham_uplo_t uplo; /* Check input arguments */ if ((side != ChamLeft) && (side != ChamRight)) { @@ -103,54 +104,58 @@ CUDA_zlarfb( cham_side_t side, cham_trans_t trans, // Comments assume H C. When forming H^H C, T gets transposed via transT. // W = C^H V - cublasZgemm( CUBLAS_HANDLE - chameleon_cublas_const(ChamConjTrans), chameleon_cublas_const(notransV), - N, K, M, - CUBLAS_SADDR(zone), C, LDC, - V, LDV, - CUBLAS_SADDR(zzero), WORK, LDWORK ); + rc = cublasZgemm( handle, + chameleon_cublas_const(ChamConjTrans), chameleon_cublas_const(notransV), + N, K, M, + CUBLAS_SADDR(zone), C, LDC, + V, LDV, + CUBLAS_SADDR(zzero), WORK, LDWORK ); + assert( rc == CUBLAS_STATUS_SUCCESS ); // W = W T^H = C^H V T^H CUDA_ztrmm( ChamRight, uplo, transT, ChamNonUnit, N, K, &zone, T, LDT, WORK, LDWORK, - CUBLAS_STREAM_VALUE ); + handle ); // C = C - V W^H = C - V T V^H C = (I - V T V^H) C = H C - cublasZgemm( CUBLAS_HANDLE - chameleon_cublas_const(notransV), chameleon_cublas_const(ChamConjTrans), - M, N, K, - CUBLAS_SADDR(mzone), V, LDV, - WORK, LDWORK, - CUBLAS_SADDR(zone), C, LDC ); + rc = cublasZgemm( handle, + chameleon_cublas_const(notransV), chameleon_cublas_const(ChamConjTrans), + M, N, K, + CUBLAS_SADDR(mzone), V, LDV, + WORK, LDWORK, + CUBLAS_SADDR(zone), C, LDC ); + assert( rc == CUBLAS_STATUS_SUCCESS ); } else { // Form C H or C H^H // Comments assume C H. When forming C H^H, T gets transposed via trans. // W = C V - cublasZgemm( CUBLAS_HANDLE - chameleon_cublas_const(ChamNoTrans), chameleon_cublas_const(notransV), - M, K, N, - CUBLAS_SADDR(zone), C, LDC, - V, LDV, - CUBLAS_SADDR(zzero), WORK, LDWORK ); + rc = cublasZgemm( handle, + chameleon_cublas_const(ChamNoTrans), chameleon_cublas_const(notransV), + M, K, N, + CUBLAS_SADDR(zone), C, LDC, + V, LDV, + CUBLAS_SADDR(zzero), WORK, LDWORK ); + assert( rc == CUBLAS_STATUS_SUCCESS ); // W = W T = C V T CUDA_ztrmm( ChamRight, uplo, trans, ChamNonUnit, M, K, &zone, T, LDT, WORK, LDWORK, - CUBLAS_STREAM_VALUE ); + handle ); // C = C - W V^H = C - C V T V^H = C (I - V T V^H) = C H - cublasZgemm( CUBLAS_HANDLE - chameleon_cublas_const(ChamNoTrans), chameleon_cublas_const(transV), - M, N, K, - CUBLAS_SADDR(mzone), WORK, LDWORK, - V, LDV, - CUBLAS_SADDR(zone), C, LDC ); + rc = cublasZgemm( handle, + chameleon_cublas_const(ChamNoTrans), chameleon_cublas_const(transV), + M, N, K, + CUBLAS_SADDR(mzone), WORK, LDWORK, + V, LDV, + CUBLAS_SADDR(zone), C, LDC ); + assert( rc == CUBLAS_STATUS_SUCCESS ); } return CHAMELEON_SUCCESS; } diff --git a/cudablas/compute/cuda_zparfb.c b/cudablas/compute/cuda_zparfb.c index 419e2b3e5bd1f46eafd59b2f38255fe53afda9c5..476c8b7faef0e0bf6b3a6bb387b15d130682c9d3 100644 --- a/cudablas/compute/cuda_zparfb.c +++ b/cudablas/compute/cuda_zparfb.c @@ -146,7 +146,7 @@ CUDA_zparfb( cham_side_t side, cham_trans_t trans, const cuDoubleComplex *V, int LDV, const cuDoubleComplex *T, int LDT, cuDoubleComplex *WORK, int LWORK, - CUBLAS_STREAM_PARAM ) + cublasHandle_t handle ) { #if defined(PRECISION_z) || defined(PRECISION_c) cuDoubleComplex zzero = make_cuDoubleComplex(0.0, 0.0); @@ -159,14 +159,12 @@ CUDA_zparfb( cham_side_t side, cham_trans_t trans, #endif /* defined(PRECISION_z) || defined(PRECISION_c) */ cuDoubleComplex *workW, *workC, *workV; - int ldW, ldC, ldV; - int j; - cham_trans_t transW; - cham_trans_t transA2; - int wssize = 0; - int wrsize = 0; - - CUBLAS_GET_STREAM; + cublasStatus_t rc; + cudaStream_t stream; + int j, ldW, ldC, ldV; + cham_trans_t transW, transA2; + int wssize = 0; + int wrsize = 0; /* Check input arguments */ if ((side != ChamLeft) && (side != ChamRight)) { @@ -219,6 +217,8 @@ CUDA_zparfb( cham_side_t side, cham_trans_t trans, return CHAMELEON_SUCCESS; } + cublasGetStream( handle, &stream ); + if (direct == ChamDirForward) { if (side == ChamLeft) { @@ -307,12 +307,13 @@ CUDA_zparfb( cham_side_t side, cham_trans_t trans, transW = storev == ChamColumnwise ? ChamConjTrans : ChamNoTrans; transA2 = storev == ChamColumnwise ? ChamNoTrans : ChamConjTrans; - cublasZgemm( CUBLAS_HANDLE - chameleon_cublas_const(transW), chameleon_cublas_const(ChamNoTrans), - K, N1, M2, - CUBLAS_SADDR(zone), workV /* M2*K */, ldV, - A2 /* M2*N2 */, LDA2, - CUBLAS_SADDR(zone), workW /* K *N2 */, ldW ); + rc = cublasZgemm( handle, + chameleon_cublas_const(transW), chameleon_cublas_const(ChamNoTrans), + K, N1, M2, + CUBLAS_SADDR(zone), workV /* M2*K */, ldV, + A2 /* M2*N2 */, LDA2, + CUBLAS_SADDR(zone), workW /* K *N2 */, ldW ); + assert( rc == CUBLAS_STATUS_SUCCESS ); if ( workC == NULL ) { /* W = op(T) * W */ @@ -320,48 +321,52 @@ CUDA_zparfb( cham_side_t side, cham_trans_t trans, K, N2, &zone, T, LDT, workW, ldW, - CUBLAS_STREAM_VALUE ); + handle ); /* A1 = A1 - W = A1 - op(T) * W */ for(j = 0; j < N1; j++) { - cublasZaxpy( CUBLAS_HANDLE - K, CUBLAS_SADDR(mzone), - workW + ldW * j, 1, - A1 + LDA1 * j, 1 ); + rc = cublasZaxpy( handle, + K, CUBLAS_SADDR(mzone), + workW + ldW * j, 1, + A1 + LDA1 * j, 1 ); + assert( rc == CUBLAS_STATUS_SUCCESS ); } /* A2 = A2 - op(V) * W */ - cublasZgemm( CUBLAS_HANDLE - chameleon_cublas_const(transA2), chameleon_cublas_const(ChamNoTrans), - M2, N2, K, - CUBLAS_SADDR(mzone), workV /* M2 * K */, ldV, - workW /* K * N2 */, ldW, - CUBLAS_SADDR(zone), A2 /* M2 * N2 */, LDA2 ); + rc = cublasZgemm( handle, + chameleon_cublas_const(transA2), chameleon_cublas_const(ChamNoTrans), + M2, N2, K, + CUBLAS_SADDR(mzone), workV /* M2 * K */, ldV, + workW /* K * N2 */, ldW, + CUBLAS_SADDR(zone), A2 /* M2 * N2 */, LDA2 ); + assert( rc == CUBLAS_STATUS_SUCCESS ); } else { /* Wc = V * op(T) */ - cublasZgemm( CUBLAS_HANDLE - chameleon_cublas_const(transA2), chameleon_cublas_const(trans), - M2, K, K, - CUBLAS_SADDR(zone), workV, ldV, - T, LDT, - CUBLAS_SADDR(zzero), workC, ldC ); + rc = cublasZgemm( handle, + chameleon_cublas_const(transA2), chameleon_cublas_const(trans), + M2, K, K, + CUBLAS_SADDR(zone), workV, ldV, + T, LDT, + CUBLAS_SADDR(zzero), workC, ldC ); /* A1 = A1 - opt(T) * W */ - cublasZgemm( CUBLAS_HANDLE - chameleon_cublas_const(trans), chameleon_cublas_const(ChamNoTrans), - K, N1, K, - CUBLAS_SADDR(mzone), T, LDT, - workW, ldW, - CUBLAS_SADDR(zone), A1, LDA1 ); + rc = cublasZgemm( handle, + chameleon_cublas_const(trans), chameleon_cublas_const(ChamNoTrans), + K, N1, K, + CUBLAS_SADDR(mzone), T, LDT, + workW, ldW, + CUBLAS_SADDR(zone), A1, LDA1 ); + assert( rc == CUBLAS_STATUS_SUCCESS ); /* A2 = A2 - Wc * W */ - cublasZgemm( CUBLAS_HANDLE - chameleon_cublas_const(ChamNoTrans), chameleon_cublas_const(ChamNoTrans), - M2, N2, K, - CUBLAS_SADDR(mzone), workC, ldC, - workW, ldW, - CUBLAS_SADDR(zone), A2, LDA2 ); + rc = cublasZgemm( handle, + chameleon_cublas_const(ChamNoTrans), chameleon_cublas_const(ChamNoTrans), + M2, N2, K, + CUBLAS_SADDR(mzone), workC, ldC, + workW, ldW, + CUBLAS_SADDR(zone), A2, LDA2 ); + assert( rc == CUBLAS_STATUS_SUCCESS ); } } else { @@ -450,12 +455,13 @@ CUDA_zparfb( cham_side_t side, cham_trans_t trans, transW = storev == ChamColumnwise ? ChamNoTrans : ChamConjTrans; transA2 = storev == ChamColumnwise ? ChamConjTrans : ChamNoTrans; - cublasZgemm(CUBLAS_HANDLE - chameleon_cublas_const(ChamNoTrans), chameleon_cublas_const(transW), - M1, K, N2, - CUBLAS_SADDR(zone), A2 /* M1*N2 */, LDA2, - workV /* K *N2 */, ldV, - CUBLAS_SADDR(zone), workW /* M1*K */, ldW); + rc = cublasZgemm( handle, + chameleon_cublas_const(ChamNoTrans), chameleon_cublas_const(transW), + M1, K, N2, + CUBLAS_SADDR(zone), A2 /* M1*N2 */, LDA2, + workV /* K *N2 */, ldV, + CUBLAS_SADDR(zone), workW /* M1*K */, ldW); + assert( rc == CUBLAS_STATUS_SUCCESS ); if ( workC == NULL ) { /* W = W * op(T) */ @@ -463,48 +469,53 @@ CUDA_zparfb( cham_side_t side, cham_trans_t trans, M2, K, &zone, T, LDT, workW, ldW, - CUBLAS_STREAM_VALUE ); + handle ); /* A1 = A1 - W = A1 - W * op(T) */ for(j = 0; j < K; j++) { - cublasZaxpy( CUBLAS_HANDLE - M1, CUBLAS_SADDR(mzone), - workW + ldW * j, 1, - A1 + LDA1 * j, 1 ); + rc = cublasZaxpy( handle, + M1, CUBLAS_SADDR(mzone), + workW + ldW * j, 1, + A1 + LDA1 * j, 1 ); + assert( rc == CUBLAS_STATUS_SUCCESS ); } /* A2 = A2 - W * op(V) */ - cublasZgemm(CUBLAS_HANDLE - chameleon_cublas_const(ChamNoTrans), chameleon_cublas_const(transA2), - M2, N2, K, - CUBLAS_SADDR(mzone), workW /* M2*K */, ldW, - workV /* K *N2 */, ldV, - CUBLAS_SADDR(zone), A2 /* M2*N2 */, LDA2); + rc = cublasZgemm( handle, + chameleon_cublas_const(ChamNoTrans), chameleon_cublas_const(transA2), + M2, N2, K, + CUBLAS_SADDR(mzone), workW /* M2*K */, ldW, + workV /* K *N2 */, ldV, + CUBLAS_SADDR(zone), A2 /* M2*N2 */, LDA2); + assert( rc == CUBLAS_STATUS_SUCCESS ); } else { /* A1 = A1 - W * opt(T) */ - cublasZgemm( CUBLAS_HANDLE - chameleon_cublas_const(ChamNoTrans), chameleon_cublas_const(trans), - M1, K, K, - CUBLAS_SADDR(mzone), workW, ldW, - T, LDT, - CUBLAS_SADDR(zone), A1, LDA1 ); + rc = cublasZgemm( handle, + chameleon_cublas_const(ChamNoTrans), chameleon_cublas_const(trans), + M1, K, K, + CUBLAS_SADDR(mzone), workW, ldW, + T, LDT, + CUBLAS_SADDR(zone), A1, LDA1 ); + assert( rc == CUBLAS_STATUS_SUCCESS ); /* Wc = op(T) * V */ - cublasZgemm( CUBLAS_HANDLE - chameleon_cublas_const(trans), chameleon_cublas_const(transA2), - K, N2, K, - CUBLAS_SADDR(zone), T, LDT, - workV, ldV, - CUBLAS_SADDR(zzero), workC, ldC ); + rc = cublasZgemm( handle, + chameleon_cublas_const(trans), chameleon_cublas_const(transA2), + K, N2, K, + CUBLAS_SADDR(zone), T, LDT, + workV, ldV, + CUBLAS_SADDR(zzero), workC, ldC ); + assert( rc == CUBLAS_STATUS_SUCCESS ); /* A2 = A2 - W * Wc */ - cublasZgemm( CUBLAS_HANDLE - chameleon_cublas_const(ChamNoTrans), chameleon_cublas_const(ChamNoTrans), - M2, N2, K, - CUBLAS_SADDR(mzone), workW, ldW, - workC, ldC, - CUBLAS_SADDR(zone), A2, LDA2 ); + rc = cublasZgemm( handle, + chameleon_cublas_const(ChamNoTrans), chameleon_cublas_const(ChamNoTrans), + M2, N2, K, + CUBLAS_SADDR(mzone), workW, ldW, + workC, ldC, + CUBLAS_SADDR(zone), A2, LDA2 ); + assert( rc == CUBLAS_STATUS_SUCCESS ); } } } diff --git a/cudablas/compute/cuda_zsymm.c b/cudablas/compute/cuda_zsymm.c index 2ed61af73945442e9586c0c2b5a98fb909aecd2a..3dcfa14eb9b0eecae32a4d2df75e54382e0e4038 100644 --- a/cudablas/compute/cuda_zsymm.c +++ b/cudablas/compute/cuda_zsymm.c @@ -20,23 +20,26 @@ */ #include "cudablas.h" -int CUDA_zsymm( cham_side_t side, cham_uplo_t uplo, - int m, int n, - const cuDoubleComplex *alpha, - const cuDoubleComplex *A, int lda, - const cuDoubleComplex *B, int ldb, - const cuDoubleComplex *beta, - cuDoubleComplex *C, int ldc, - CUBLAS_STREAM_PARAM ) +int +CUDA_zsymm( cham_side_t side, cham_uplo_t uplo, + int m, int n, + const cuDoubleComplex *alpha, + const cuDoubleComplex *A, int lda, + const cuDoubleComplex *B, int ldb, + const cuDoubleComplex *beta, + cuDoubleComplex *C, int ldc, + cublasHandle_t handle ) { - cublasZsymm(CUBLAS_HANDLE - chameleon_cublas_const(side), chameleon_cublas_const(uplo), - m, n, - CUBLAS_VALUE(alpha), A, lda, - B, ldb, - CUBLAS_VALUE(beta), C, ldc); + cublasStatus_t rc; - assert( CUBLAS_STATUS_SUCCESS == cublasGetError() ); + rc = cublasZsymm( handle, + chameleon_cublas_const(side), chameleon_cublas_const(uplo), + m, n, + CUBLAS_VALUE(alpha), A, lda, + B, ldb, + CUBLAS_VALUE(beta), C, ldc ); + assert( rc == CUBLAS_STATUS_SUCCESS ); + (void)rc; return CHAMELEON_SUCCESS; } diff --git a/cudablas/compute/cuda_zsyr2k.c b/cudablas/compute/cuda_zsyr2k.c index 5c40f1d54fa8034218c9ccd9afd6c409e493186b..9bd078b38e7afd1c4494d7f668a8e3d4863361cc 100644 --- a/cudablas/compute/cuda_zsyr2k.c +++ b/cudablas/compute/cuda_zsyr2k.c @@ -20,24 +20,26 @@ */ #include "cudablas.h" -int CUDA_zsyr2k( - cham_uplo_t uplo, cham_trans_t trans, - int n, int k, - const cuDoubleComplex *alpha, - const cuDoubleComplex *A, int lda, - const cuDoubleComplex *B, int ldb, - const cuDoubleComplex *beta, - cuDoubleComplex *C, int ldc, - CUBLAS_STREAM_PARAM) +int +CUDA_zsyr2k( cham_uplo_t uplo, cham_trans_t trans, + int n, int k, + const cuDoubleComplex *alpha, + const cuDoubleComplex *A, int lda, + const cuDoubleComplex *B, int ldb, + const cuDoubleComplex *beta, + cuDoubleComplex *C, int ldc, + cublasHandle_t handle ) { - cublasZsyr2k(CUBLAS_HANDLE - chameleon_cublas_const(uplo), chameleon_cublas_const(trans), - n, k, - CUBLAS_VALUE(alpha), A, lda, - B, ldb, - CUBLAS_VALUE(beta), C, ldc); + cublasStatus_t rc; - assert( CUBLAS_STATUS_SUCCESS == cublasGetError() ); + rc = cublasZsyr2k( handle, + chameleon_cublas_const(uplo), chameleon_cublas_const(trans), + n, k, + CUBLAS_VALUE(alpha), A, lda, + B, ldb, + CUBLAS_VALUE(beta), C, ldc ); + assert( rc == CUBLAS_STATUS_SUCCESS ); + (void)rc; return CHAMELEON_SUCCESS; } diff --git a/cudablas/compute/cuda_zsyrk.c b/cudablas/compute/cuda_zsyrk.c index b2a61353c747a430b887a661f588f502d713fe79..cdf966546a2a2d7229766146c7000c5c34c47e29 100644 --- a/cudablas/compute/cuda_zsyrk.c +++ b/cudablas/compute/cuda_zsyrk.c @@ -20,21 +20,24 @@ */ #include "cudablas.h" -int CUDA_zsyrk(cham_uplo_t uplo, cham_trans_t trans, - int n, int k, - const cuDoubleComplex *alpha, - const cuDoubleComplex *A, int lda, - const cuDoubleComplex *beta, - cuDoubleComplex *C, int ldc, - CUBLAS_STREAM_PARAM) +int +CUDA_zsyrk( cham_uplo_t uplo, cham_trans_t trans, + int n, int k, + const cuDoubleComplex *alpha, + const cuDoubleComplex *A, int lda, + const cuDoubleComplex *beta, + cuDoubleComplex *B, int ldb, + cublasHandle_t handle ) { - cublasZsyrk(CUBLAS_HANDLE - chameleon_cublas_const(uplo), chameleon_cublas_const(trans), - n, k, - CUBLAS_VALUE(alpha), A, lda, - CUBLAS_VALUE(beta), C, ldc); + cublasStatus_t rc; - assert( CUBLAS_STATUS_SUCCESS == cublasGetError() ); + rc = cublasZsyrk( handle, + chameleon_cublas_const(uplo), chameleon_cublas_const(trans), + n, k, + CUBLAS_VALUE(alpha), A, lda, + CUBLAS_VALUE(beta), B, ldb ); + assert( rc == CUBLAS_STATUS_SUCCESS ); + (void)rc; return CHAMELEON_SUCCESS; } diff --git a/cudablas/compute/cuda_ztpmlqt.c b/cudablas/compute/cuda_ztpmlqt.c index c10a2cb01f817e8e493e140545ed3661877b8be3..58c613a756fccccde789eb01f4b7a9be5f62fe52 100644 --- a/cudablas/compute/cuda_ztpmlqt.c +++ b/cudablas/compute/cuda_ztpmlqt.c @@ -142,7 +142,7 @@ CUDA_ztpmlqt( cham_side_t side, cham_trans_t trans, cuDoubleComplex *A, int LDA, cuDoubleComplex *B, int LDB, cuDoubleComplex *WORK, int lwork, - CUBLAS_STREAM_PARAM ) + cublasHandle_t handle ) { int m1, n1; @@ -166,14 +166,14 @@ CUDA_ztpmlqt( cham_side_t side, cham_trans_t trans, CUDA_ztsmlq( side, trans, m1, n1, M, N, K, IB, A, LDA, B, LDB, V, LDV, T, LDT, WORK, lwork, - CUBLAS_STREAM_VALUE ); + handle ); } /* TT case */ else if( L == N ) { CUDA_zttmlq( side, trans, m1, n1, M, N, K, IB, A, LDA, B, LDB, V, LDV, T, LDT, WORK, lwork, - CUBLAS_STREAM_VALUE ); + handle ); } else { cudablas_error(-6, "TPMLQT not available on GPU for general cases yet\n" ); diff --git a/cudablas/compute/cuda_ztpmqrt.c b/cudablas/compute/cuda_ztpmqrt.c index bfbc286335af56acad55dab892ff763ab0fc7af5..ddcbc1b522e8b6968e6c6ffca000907c6951ae0a 100644 --- a/cudablas/compute/cuda_ztpmqrt.c +++ b/cudablas/compute/cuda_ztpmqrt.c @@ -144,7 +144,7 @@ CUDA_ztpmqrt( cham_side_t side, cham_trans_t trans, cuDoubleComplex *A, int LDA, cuDoubleComplex *B, int LDB, cuDoubleComplex *WORK, int lwork, - CUBLAS_STREAM_PARAM ) + cublasHandle_t handle ) { int m1, n1; @@ -168,14 +168,14 @@ CUDA_ztpmqrt( cham_side_t side, cham_trans_t trans, CUDA_ztsmqr( side, trans, m1, n1, M, N, K, IB, A, LDA, B, LDB, V, LDV, T, LDT, WORK, lwork, - CUBLAS_STREAM_VALUE ); + handle ); } /* TT case */ else if( L == M ) { CUDA_zttmqr( side, trans, m1, n1, M, N, K, IB, A, LDA, B, LDB, V, LDV, T, LDT, WORK, lwork, - CUBLAS_STREAM_VALUE ); + handle ); } else { cudablas_error(-6, "TPMQRT not available on GPU for general cases yet\n" ); diff --git a/cudablas/compute/cuda_ztrmm.c b/cudablas/compute/cuda_ztrmm.c index 67e7300973ca48a890d29f5c901a5f570e98a500..f177a09312030ef9497570120ab584c6cdae6b05 100644 --- a/cudablas/compute/cuda_ztrmm.c +++ b/cudablas/compute/cuda_ztrmm.c @@ -20,27 +20,27 @@ */ #include "cudablas.h" -int CUDA_ztrmm( - cham_side_t side, cham_uplo_t uplo, - cham_trans_t transa, cham_diag_t diag, - int m, int n, - const cuDoubleComplex *alpha, - const cuDoubleComplex *A, int lda, - cuDoubleComplex *B, int ldb, - CUBLAS_STREAM_PARAM) +int +CUDA_ztrmm( cham_side_t side, cham_uplo_t uplo, + cham_trans_t transa, cham_diag_t diag, + int m, int n, + const cuDoubleComplex *alpha, + const cuDoubleComplex *A, int lda, + cuDoubleComplex *B, int ldb, + cublasHandle_t handle ) { + cublasStatus_t rc; - cublasZtrmm( - CUBLAS_HANDLE - chameleon_cublas_const(side), chameleon_cublas_const(uplo), - chameleon_cublas_const(transa), chameleon_cublas_const(diag), - m, n, - CUBLAS_VALUE(alpha), A, lda, - B, ldb, - B, ldb); - - assert( CUBLAS_STATUS_SUCCESS == cublasGetError() ); + rc = cublasZtrmm( handle, + chameleon_cublas_const(side), chameleon_cublas_const(uplo), + chameleon_cublas_const(transa), chameleon_cublas_const(diag), + m, n, + CUBLAS_VALUE(alpha), A, lda, + B, ldb, + B, ldb ); + assert( rc == CUBLAS_STATUS_SUCCESS ); + (void)rc; return CHAMELEON_SUCCESS; } diff --git a/cudablas/compute/cuda_ztrsm.c b/cudablas/compute/cuda_ztrsm.c index 5f63d2464c679c239393040f66bb8f5ef3c1df9b..32d8c83c9d4f80bde4a76ddc93fb724246ef932e 100644 --- a/cudablas/compute/cuda_ztrsm.c +++ b/cudablas/compute/cuda_ztrsm.c @@ -20,22 +20,25 @@ */ #include "cudablas.h" -int CUDA_ztrsm(cham_side_t side, cham_uplo_t uplo, - cham_trans_t transa, cham_diag_t diag, - int m, int n, - const cuDoubleComplex *alpha, - const cuDoubleComplex *A, int lda, - cuDoubleComplex *B, int ldb, - CUBLAS_STREAM_PARAM) +int +CUDA_ztrsm( cham_side_t side, cham_uplo_t uplo, + cham_trans_t transa, cham_diag_t diag, + int m, int n, + const cuDoubleComplex *alpha, + const cuDoubleComplex *A, int lda, + cuDoubleComplex *B, int ldb, + cublasHandle_t handle ) { - cublasZtrsm(CUBLAS_HANDLE - chameleon_cublas_const(side), chameleon_cublas_const(uplo), - chameleon_cublas_const(transa), chameleon_cublas_const(diag), - m, n, - CUBLAS_VALUE(alpha), A, lda, - B, ldb); + cublasStatus_t rc; - assert( CUBLAS_STATUS_SUCCESS == cublasGetError() ); + rc = cublasZtrsm( handle, + chameleon_cublas_const(side), chameleon_cublas_const(uplo), + chameleon_cublas_const(transa), chameleon_cublas_const(diag), + m, n, + CUBLAS_VALUE(alpha), A, lda, + B, ldb ); + assert( rc == CUBLAS_STATUS_SUCCESS ); + (void)rc; return CHAMELEON_SUCCESS; } diff --git a/cudablas/compute/cuda_ztsmlq.c b/cudablas/compute/cuda_ztsmlq.c index 820518d5dcd7110d16038ae0cc8a9327c6c82029..63918f4b50c889448027d22534de2c4b31a0f0cc 100644 --- a/cudablas/compute/cuda_ztsmlq.c +++ b/cudablas/compute/cuda_ztsmlq.c @@ -20,17 +20,17 @@ */ #include "cudablas.h" -int CUDA_ztsmlq( - cham_side_t side, cham_trans_t trans, - int M1, int N1, - int M2, int N2, - int K, int IB, - cuDoubleComplex *A1, int LDA1, - cuDoubleComplex *A2, int LDA2, - const cuDoubleComplex *V, int LDV, - const cuDoubleComplex *T, int LDT, - cuDoubleComplex *WORK, int LWORK, - CUBLAS_STREAM_PARAM) +int +CUDA_ztsmlq( cham_side_t side, cham_trans_t trans, + int M1, int N1, + int M2, int N2, + int K, int IB, + cuDoubleComplex *A1, int LDA1, + cuDoubleComplex *A2, int LDA2, + const cuDoubleComplex *V, int LDV, + const cuDoubleComplex *T, int LDT, + cuDoubleComplex *WORK, int LWORK, + cublasHandle_t handle ) { int i, i1, i3; int kb; @@ -133,7 +133,7 @@ int CUDA_ztsmlq( A2, LDA2, V + i, LDV, T + LDT*i, LDT, - WORK, LWORK, CUBLAS_STREAM_VALUE ); + WORK, LWORK, handle ); } return CHAMELEON_SUCCESS; } diff --git a/cudablas/compute/cuda_ztsmqr.c b/cudablas/compute/cuda_ztsmqr.c index e3e576caf34576e5945df609fbc2418c8db4295e..61b05597243418c988b5cea60e914fc35773881f 100644 --- a/cudablas/compute/cuda_ztsmqr.c +++ b/cudablas/compute/cuda_ztsmqr.c @@ -20,17 +20,17 @@ */ #include "cudablas.h" -int CUDA_ztsmqr( - cham_side_t side, cham_trans_t trans, - int M1, int N1, - int M2, int N2, - int K, int IB, - cuDoubleComplex *A1, int LDA1, - cuDoubleComplex *A2, int LDA2, - const cuDoubleComplex *V, int LDV, - const cuDoubleComplex *T, int LDT, - cuDoubleComplex *WORK, int LWORK, - CUBLAS_STREAM_PARAM) +int +CUDA_ztsmqr( cham_side_t side, cham_trans_t trans, + int M1, int N1, + int M2, int N2, + int K, int IB, + cuDoubleComplex *A1, int LDA1, + cuDoubleComplex *A2, int LDA2, + const cuDoubleComplex *V, int LDV, + const cuDoubleComplex *T, int LDT, + cuDoubleComplex *WORK, int LWORK, + cublasHandle_t handle ) { int i, i1, i3; int NQ; @@ -135,7 +135,7 @@ int CUDA_ztsmqr( A2, LDA2, V + LDV*i, LDV, T + LDT*i, LDT, - WORK, LWORK, CUBLAS_STREAM_VALUE ); + WORK, LWORK, handle ); } return CHAMELEON_SUCCESS; } diff --git a/cudablas/compute/cuda_zttmlq.c b/cudablas/compute/cuda_zttmlq.c index c86f6611f34ab556c3bbbcdd7476937b6de13a34..4e6ac068a494b1afbf0ace0ce03b8595c9aa0fb1 100644 --- a/cudablas/compute/cuda_zttmlq.c +++ b/cudablas/compute/cuda_zttmlq.c @@ -20,17 +20,17 @@ */ #include "cudablas.h" -int CUDA_zttmlq( - cham_side_t side, cham_trans_t trans, - int M1, int N1, - int M2, int N2, - int K, int IB, - cuDoubleComplex *A1, int LDA1, - cuDoubleComplex *A2, int LDA2, - const cuDoubleComplex *V, int LDV, - const cuDoubleComplex *T, int LDT, - cuDoubleComplex *WORK, int LWORK, - CUBLAS_STREAM_PARAM) +int +CUDA_zttmlq( cham_side_t side, cham_trans_t trans, + int M1, int N1, + int M2, int N2, + int K, int IB, + cuDoubleComplex *A1, int LDA1, + cuDoubleComplex *A2, int LDA2, + const cuDoubleComplex *V, int LDV, + const cuDoubleComplex *T, int LDT, + cuDoubleComplex *WORK, int LWORK, + cublasHandle_t handle ) { int i, i1, i3; int kb, l; @@ -134,7 +134,7 @@ int CUDA_zttmlq( A2, LDA2, V + i, LDV, T + LDT * i, LDT, - WORK, LWORK, CUBLAS_STREAM_VALUE ); + WORK, LWORK, handle ); } return CHAMELEON_SUCCESS; } diff --git a/cudablas/compute/cuda_zttmqr.c b/cudablas/compute/cuda_zttmqr.c index 8b9036d5373213a51f25947a2b6f4cadec68ae3f..bd2a17569b35ca7930a0a262ee3a72d7bdf69ba3 100644 --- a/cudablas/compute/cuda_zttmqr.c +++ b/cudablas/compute/cuda_zttmqr.c @@ -20,17 +20,17 @@ */ #include "cudablas.h" -int CUDA_zttmqr( - cham_side_t side, cham_trans_t trans, - int M1, int N1, - int M2, int N2, - int K, int IB, - cuDoubleComplex *A1, int LDA1, - cuDoubleComplex *A2, int LDA2, - const cuDoubleComplex *V, int LDV, - const cuDoubleComplex *T, int LDT, - cuDoubleComplex *WORK, int LWORK, - CUBLAS_STREAM_PARAM) +int +CUDA_zttmqr( cham_side_t side, cham_trans_t trans, + int M1, int N1, + int M2, int N2, + int K, int IB, + cuDoubleComplex *A1, int LDA1, + cuDoubleComplex *A2, int LDA2, + const cuDoubleComplex *V, int LDV, + const cuDoubleComplex *T, int LDT, + cuDoubleComplex *WORK, int LWORK, + cublasHandle_t handle ) { int i, i1, i3; int NQ; @@ -135,7 +135,7 @@ int CUDA_zttmqr( A2, LDA2, V + LDV*i, LDV, T + LDT*i, LDT, - WORK, LWORK, CUBLAS_STREAM_VALUE ); + WORK, LWORK, handle ); } return CHAMELEON_SUCCESS; } diff --git a/cudablas/compute/cuda_zunmlqt.c b/cudablas/compute/cuda_zunmlqt.c index 5689bfa016095632377796c1d1b249f8d0550275..47a3b19806b6fd4dc4acab4db331e3ee734e1652 100644 --- a/cudablas/compute/cuda_zunmlqt.c +++ b/cudablas/compute/cuda_zunmlqt.c @@ -21,13 +21,13 @@ #include "cudablas.h" int -CUDA_zunmlqt(cham_side_t side, cham_trans_t trans, - int M, int N, int K, int IB, - const cuDoubleComplex *A, int LDA, - const cuDoubleComplex *T, int LDT, - cuDoubleComplex *C, int LDC, - cuDoubleComplex *WORK, int LDWORK, - CUBLAS_STREAM_PARAM ) +CUDA_zunmlqt( cham_side_t side, cham_trans_t trans, + int M, int N, int K, int IB, + const cuDoubleComplex *A, int LDA, + const cuDoubleComplex *T, int LDT, + cuDoubleComplex *C, int LDC, + cuDoubleComplex *WORK, int LDWORK, + cublasHandle_t handle ) { int i, kb; int i1, i3; @@ -122,7 +122,7 @@ CUDA_zunmlqt(cham_side_t side, cham_trans_t trans, A + LDA * i + i, LDA, T + LDT * i, LDT, C + LDC * jc + ic, LDC, - WORK, LDWORK, CUBLAS_STREAM_VALUE); + WORK, LDWORK, handle ); } return CHAMELEON_SUCCESS; } diff --git a/cudablas/compute/cuda_zunmqrt.c b/cudablas/compute/cuda_zunmqrt.c index a16d10a4792f0e6a472ccf2f77a53607a4d37a29..3864bd5ffe061a38f03531da9a544c79ee4316cc 100644 --- a/cudablas/compute/cuda_zunmqrt.c +++ b/cudablas/compute/cuda_zunmqrt.c @@ -21,13 +21,13 @@ #include "cudablas.h" int -CUDA_zunmqrt(cham_side_t side, cham_trans_t trans, - int M, int N, int K, int IB, - const cuDoubleComplex *A, int LDA, - const cuDoubleComplex *T, int LDT, - cuDoubleComplex *C, int LDC, - cuDoubleComplex *WORK, int LDWORK, - CUBLAS_STREAM_PARAM ) +CUDA_zunmqrt( cham_side_t side, cham_trans_t trans, + int M, int N, int K, int IB, + const cuDoubleComplex *A, int LDA, + const cuDoubleComplex *T, int LDT, + cuDoubleComplex *C, int LDC, + cuDoubleComplex *WORK, int LDWORK, + cublasHandle_t handle ) { int i, kb; int i1, i3; @@ -116,7 +116,7 @@ CUDA_zunmqrt(cham_side_t side, cham_trans_t trans, T + LDT * i, LDT, C + LDC * jc + ic, LDC, WORK, LDWORK, - CUBLAS_STREAM_VALUE ); + handle ); } return CHAMELEON_SUCCESS;