diff --git a/cudablas/compute/cuda_zlarfb.c b/cudablas/compute/cuda_zlarfb.c index 5039d0265ca88a197568f7aa597384f44b212f08..8175d1944d74d50ec90f8d59be655c48c3f87942 100644 --- a/cudablas/compute/cuda_zlarfb.c +++ b/cudablas/compute/cuda_zlarfb.c @@ -45,6 +45,7 @@ CUDA_zlarfb( cham_side_t side, cham_trans_t trans, cublasStatus_t rc; cham_trans_t transT, notransV, transV; cham_uplo_t uplo; + int info = 0; /* Check input arguments */ if ((side != ChamLeft) && (side != ChamRight)) { @@ -111,6 +112,7 @@ CUDA_zlarfb( cham_side_t side, cham_trans_t trans, V, LDV, CUBLAS_SADDR(zzero), WORK, LDWORK ); assert( rc == CUBLAS_STATUS_SUCCESS ); + info += (rc == CUBLAS_STATUS_SUCCESS) ? 0 : 1; // W = W T^H = C^H V T^H CUDA_ztrmm( ChamRight, uplo, transT, ChamNonUnit, @@ -127,6 +129,7 @@ CUDA_zlarfb( cham_side_t side, cham_trans_t trans, WORK, LDWORK, CUBLAS_SADDR(zone), C, LDC ); assert( rc == CUBLAS_STATUS_SUCCESS ); + info += (rc == CUBLAS_STATUS_SUCCESS) ? 0 : 1; } else { // Form C H or C H^H @@ -140,6 +143,7 @@ CUDA_zlarfb( cham_side_t side, cham_trans_t trans, V, LDV, CUBLAS_SADDR(zzero), WORK, LDWORK ); assert( rc == CUBLAS_STATUS_SUCCESS ); + info += (rc == CUBLAS_STATUS_SUCCESS) ? 0 : 1; // W = W T = C V T CUDA_ztrmm( ChamRight, uplo, trans, ChamNonUnit, @@ -156,7 +160,8 @@ CUDA_zlarfb( cham_side_t side, cham_trans_t trans, V, LDV, CUBLAS_SADDR(zone), C, LDC ); assert( rc == CUBLAS_STATUS_SUCCESS ); + info += (rc == CUBLAS_STATUS_SUCCESS) ? 0 : 1; } - return (rc == CUBLAS_STATUS_SUCCESS) ? CHAMELEON_SUCCESS : CHAMELEON_ERR_UNEXPECTED; + return (info == 0) ? CHAMELEON_SUCCESS : CHAMELEON_ERR_UNEXPECTED; } diff --git a/cudablas/compute/cuda_zparfb.c b/cudablas/compute/cuda_zparfb.c index 39aaa7586df4ec0ca904997dd7e9fca7e28c3106..8977eb6a169c64e98da17f6c82edc0195733c6c8 100644 --- a/cudablas/compute/cuda_zparfb.c +++ b/cudablas/compute/cuda_zparfb.c @@ -165,6 +165,7 @@ CUDA_zparfb( cham_side_t side, cham_trans_t trans, cham_trans_t transW, transA2; int wssize = 0; int wrsize = 0; + int info = 0; /* Check input arguments */ if ((side != ChamLeft) && (side != ChamRight)) { @@ -315,6 +316,7 @@ CUDA_zparfb( cham_side_t side, cham_trans_t trans, A2 /* M2*N2 */, LDA2, CUBLAS_SADDR(zone), workW /* K *N2 */, ldW ); assert( rc == CUBLAS_STATUS_SUCCESS ); + info += (rc == CUBLAS_STATUS_SUCCESS) ? 0 : 1; if ( workC == NULL ) { /* W = op(T) * W */ @@ -331,6 +333,7 @@ CUDA_zparfb( cham_side_t side, cham_trans_t trans, workW + ldW * j, 1, A1 + LDA1 * j, 1 ); assert( rc == CUBLAS_STATUS_SUCCESS ); + info += (rc == CUBLAS_STATUS_SUCCESS) ? 0 : 1; } /* A2 = A2 - op(V) * W */ @@ -341,6 +344,7 @@ CUDA_zparfb( cham_side_t side, cham_trans_t trans, workW /* K * N2 */, ldW, CUBLAS_SADDR(zone), A2 /* M2 * N2 */, LDA2 ); assert( rc == CUBLAS_STATUS_SUCCESS ); + info += (rc == CUBLAS_STATUS_SUCCESS) ? 0 : 1; } else { /* Wc = V * op(T) */ @@ -350,6 +354,8 @@ CUDA_zparfb( cham_side_t side, cham_trans_t trans, CUBLAS_SADDR(zone), workV, ldV, T, LDT, CUBLAS_SADDR(zzero), workC, ldC ); + assert( rc == CUBLAS_STATUS_SUCCESS ); + info += (rc == CUBLAS_STATUS_SUCCESS) ? 0 : 1; /* A1 = A1 - opt(T) * W */ rc = cublasZgemm( handle, @@ -359,6 +365,7 @@ CUDA_zparfb( cham_side_t side, cham_trans_t trans, workW, ldW, CUBLAS_SADDR(zone), A1, LDA1 ); assert( rc == CUBLAS_STATUS_SUCCESS ); + info += (rc == CUBLAS_STATUS_SUCCESS) ? 0 : 1; /* A2 = A2 - Wc * W */ rc = cublasZgemm( handle, @@ -368,6 +375,7 @@ CUDA_zparfb( cham_side_t side, cham_trans_t trans, workW, ldW, CUBLAS_SADDR(zone), A2, LDA2 ); assert( rc == CUBLAS_STATUS_SUCCESS ); + info += (rc == CUBLAS_STATUS_SUCCESS) ? 0 : 1; } } else { @@ -463,6 +471,7 @@ CUDA_zparfb( cham_side_t side, cham_trans_t trans, workV /* K *N2 */, ldV, CUBLAS_SADDR(zone), workW /* M1*K */, ldW); assert( rc == CUBLAS_STATUS_SUCCESS ); + info += (rc == CUBLAS_STATUS_SUCCESS) ? 0 : 1; if ( workC == NULL ) { /* W = W * op(T) */ @@ -479,6 +488,7 @@ CUDA_zparfb( cham_side_t side, cham_trans_t trans, workW + ldW * j, 1, A1 + LDA1 * j, 1 ); assert( rc == CUBLAS_STATUS_SUCCESS ); + info += (rc == CUBLAS_STATUS_SUCCESS) ? 0 : 1; } /* A2 = A2 - W * op(V) */ @@ -489,6 +499,7 @@ CUDA_zparfb( cham_side_t side, cham_trans_t trans, workV /* K *N2 */, ldV, CUBLAS_SADDR(zone), A2 /* M2*N2 */, LDA2); assert( rc == CUBLAS_STATUS_SUCCESS ); + info += (rc == CUBLAS_STATUS_SUCCESS) ? 0 : 1; } else { /* A1 = A1 - W * opt(T) */ @@ -499,6 +510,7 @@ CUDA_zparfb( cham_side_t side, cham_trans_t trans, T, LDT, CUBLAS_SADDR(zone), A1, LDA1 ); assert( rc == CUBLAS_STATUS_SUCCESS ); + info += (rc == CUBLAS_STATUS_SUCCESS) ? 0 : 1; /* Wc = op(T) * V */ rc = cublasZgemm( handle, @@ -508,6 +520,7 @@ CUDA_zparfb( cham_side_t side, cham_trans_t trans, workV, ldV, CUBLAS_SADDR(zzero), workC, ldC ); assert( rc == CUBLAS_STATUS_SUCCESS ); + info += (rc == CUBLAS_STATUS_SUCCESS) ? 0 : 1; /* A2 = A2 - W * Wc */ rc = cublasZgemm( handle, @@ -517,6 +530,7 @@ CUDA_zparfb( cham_side_t side, cham_trans_t trans, workC, ldC, CUBLAS_SADDR(zone), A2, LDA2 ); assert( rc == CUBLAS_STATUS_SUCCESS ); + info += (rc == CUBLAS_STATUS_SUCCESS) ? 0 : 1; } } } @@ -525,5 +539,5 @@ CUDA_zparfb( cham_side_t side, cham_trans_t trans, return CHAMELEON_ERR_NOT_SUPPORTED; } - return (rc == CUBLAS_STATUS_SUCCESS) ? CHAMELEON_SUCCESS : CHAMELEON_ERR_UNEXPECTED; + return (info == 0) ? CHAMELEON_SUCCESS : CHAMELEON_ERR_UNEXPECTED; }