From 48c9546337af55d9eb36ef53f5a23c6f6c9a1b3b Mon Sep 17 00:00:00 2001 From: Mathieu Faverge <mathieu.faverge@inria.fr> Date: Wed, 29 Mar 2023 20:49:18 +0200 Subject: [PATCH] cudablas: Use the returned value from cuda calls to return a better information --- cudablas/compute/cuda_zlarfb.c | 7 ++++++- cudablas/compute/cuda_zparfb.c | 16 +++++++++++++++- 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/cudablas/compute/cuda_zlarfb.c b/cudablas/compute/cuda_zlarfb.c index 5039d0265..8175d1944 100644 --- a/cudablas/compute/cuda_zlarfb.c +++ b/cudablas/compute/cuda_zlarfb.c @@ -45,6 +45,7 @@ CUDA_zlarfb( cham_side_t side, cham_trans_t trans, cublasStatus_t rc; cham_trans_t transT, notransV, transV; cham_uplo_t uplo; + int info = 0; /* Check input arguments */ if ((side != ChamLeft) && (side != ChamRight)) { @@ -111,6 +112,7 @@ CUDA_zlarfb( cham_side_t side, cham_trans_t trans, V, LDV, CUBLAS_SADDR(zzero), WORK, LDWORK ); assert( rc == CUBLAS_STATUS_SUCCESS ); + info += (rc == CUBLAS_STATUS_SUCCESS) ? 0 : 1; // W = W T^H = C^H V T^H CUDA_ztrmm( ChamRight, uplo, transT, ChamNonUnit, @@ -127,6 +129,7 @@ CUDA_zlarfb( cham_side_t side, cham_trans_t trans, WORK, LDWORK, CUBLAS_SADDR(zone), C, LDC ); assert( rc == CUBLAS_STATUS_SUCCESS ); + info += (rc == CUBLAS_STATUS_SUCCESS) ? 0 : 1; } else { // Form C H or C H^H @@ -140,6 +143,7 @@ CUDA_zlarfb( cham_side_t side, cham_trans_t trans, V, LDV, CUBLAS_SADDR(zzero), WORK, LDWORK ); assert( rc == CUBLAS_STATUS_SUCCESS ); + info += (rc == CUBLAS_STATUS_SUCCESS) ? 0 : 1; // W = W T = C V T CUDA_ztrmm( ChamRight, uplo, trans, ChamNonUnit, @@ -156,7 +160,8 @@ CUDA_zlarfb( cham_side_t side, cham_trans_t trans, V, LDV, CUBLAS_SADDR(zone), C, LDC ); assert( rc == CUBLAS_STATUS_SUCCESS ); + info += (rc == CUBLAS_STATUS_SUCCESS) ? 0 : 1; } - return (rc == CUBLAS_STATUS_SUCCESS) ? CHAMELEON_SUCCESS : CHAMELEON_ERR_UNEXPECTED; + return (info == 0) ? CHAMELEON_SUCCESS : CHAMELEON_ERR_UNEXPECTED; } diff --git a/cudablas/compute/cuda_zparfb.c b/cudablas/compute/cuda_zparfb.c index 39aaa7586..8977eb6a1 100644 --- a/cudablas/compute/cuda_zparfb.c +++ b/cudablas/compute/cuda_zparfb.c @@ -165,6 +165,7 @@ CUDA_zparfb( cham_side_t side, cham_trans_t trans, cham_trans_t transW, transA2; int wssize = 0; int wrsize = 0; + int info = 0; /* Check input arguments */ if ((side != ChamLeft) && (side != ChamRight)) { @@ -315,6 +316,7 @@ CUDA_zparfb( cham_side_t side, cham_trans_t trans, A2 /* M2*N2 */, LDA2, CUBLAS_SADDR(zone), workW /* K *N2 */, ldW ); assert( rc == CUBLAS_STATUS_SUCCESS ); + info += (rc == CUBLAS_STATUS_SUCCESS) ? 0 : 1; if ( workC == NULL ) { /* W = op(T) * W */ @@ -331,6 +333,7 @@ CUDA_zparfb( cham_side_t side, cham_trans_t trans, workW + ldW * j, 1, A1 + LDA1 * j, 1 ); assert( rc == CUBLAS_STATUS_SUCCESS ); + info += (rc == CUBLAS_STATUS_SUCCESS) ? 0 : 1; } /* A2 = A2 - op(V) * W */ @@ -341,6 +344,7 @@ CUDA_zparfb( cham_side_t side, cham_trans_t trans, workW /* K * N2 */, ldW, CUBLAS_SADDR(zone), A2 /* M2 * N2 */, LDA2 ); assert( rc == CUBLAS_STATUS_SUCCESS ); + info += (rc == CUBLAS_STATUS_SUCCESS) ? 0 : 1; } else { /* Wc = V * op(T) */ @@ -350,6 +354,8 @@ CUDA_zparfb( cham_side_t side, cham_trans_t trans, CUBLAS_SADDR(zone), workV, ldV, T, LDT, CUBLAS_SADDR(zzero), workC, ldC ); + assert( rc == CUBLAS_STATUS_SUCCESS ); + info += (rc == CUBLAS_STATUS_SUCCESS) ? 0 : 1; /* A1 = A1 - opt(T) * W */ rc = cublasZgemm( handle, @@ -359,6 +365,7 @@ CUDA_zparfb( cham_side_t side, cham_trans_t trans, workW, ldW, CUBLAS_SADDR(zone), A1, LDA1 ); assert( rc == CUBLAS_STATUS_SUCCESS ); + info += (rc == CUBLAS_STATUS_SUCCESS) ? 0 : 1; /* A2 = A2 - Wc * W */ rc = cublasZgemm( handle, @@ -368,6 +375,7 @@ CUDA_zparfb( cham_side_t side, cham_trans_t trans, workW, ldW, CUBLAS_SADDR(zone), A2, LDA2 ); assert( rc == CUBLAS_STATUS_SUCCESS ); + info += (rc == CUBLAS_STATUS_SUCCESS) ? 0 : 1; } } else { @@ -463,6 +471,7 @@ CUDA_zparfb( cham_side_t side, cham_trans_t trans, workV /* K *N2 */, ldV, CUBLAS_SADDR(zone), workW /* M1*K */, ldW); assert( rc == CUBLAS_STATUS_SUCCESS ); + info += (rc == CUBLAS_STATUS_SUCCESS) ? 0 : 1; if ( workC == NULL ) { /* W = W * op(T) */ @@ -479,6 +488,7 @@ CUDA_zparfb( cham_side_t side, cham_trans_t trans, workW + ldW * j, 1, A1 + LDA1 * j, 1 ); assert( rc == CUBLAS_STATUS_SUCCESS ); + info += (rc == CUBLAS_STATUS_SUCCESS) ? 0 : 1; } /* A2 = A2 - W * op(V) */ @@ -489,6 +499,7 @@ CUDA_zparfb( cham_side_t side, cham_trans_t trans, workV /* K *N2 */, ldV, CUBLAS_SADDR(zone), A2 /* M2*N2 */, LDA2); assert( rc == CUBLAS_STATUS_SUCCESS ); + info += (rc == CUBLAS_STATUS_SUCCESS) ? 0 : 1; } else { /* A1 = A1 - W * opt(T) */ @@ -499,6 +510,7 @@ CUDA_zparfb( cham_side_t side, cham_trans_t trans, T, LDT, CUBLAS_SADDR(zone), A1, LDA1 ); assert( rc == CUBLAS_STATUS_SUCCESS ); + info += (rc == CUBLAS_STATUS_SUCCESS) ? 0 : 1; /* Wc = op(T) * V */ rc = cublasZgemm( handle, @@ -508,6 +520,7 @@ CUDA_zparfb( cham_side_t side, cham_trans_t trans, workV, ldV, CUBLAS_SADDR(zzero), workC, ldC ); assert( rc == CUBLAS_STATUS_SUCCESS ); + info += (rc == CUBLAS_STATUS_SUCCESS) ? 0 : 1; /* A2 = A2 - W * Wc */ rc = cublasZgemm( handle, @@ -517,6 +530,7 @@ CUDA_zparfb( cham_side_t side, cham_trans_t trans, workC, ldC, CUBLAS_SADDR(zone), A2, LDA2 ); assert( rc == CUBLAS_STATUS_SUCCESS ); + info += (rc == CUBLAS_STATUS_SUCCESS) ? 0 : 1; } } } @@ -525,5 +539,5 @@ CUDA_zparfb( cham_side_t side, cham_trans_t trans, return CHAMELEON_ERR_NOT_SUPPORTED; } - return (rc == CUBLAS_STATUS_SUCCESS) ? CHAMELEON_SUCCESS : CHAMELEON_ERR_UNEXPECTED; + return (info == 0) ? CHAMELEON_SUCCESS : CHAMELEON_ERR_UNEXPECTED; } -- GitLab