diff --git a/gpucublas/compute/cuda_dlag2h.cu b/gpucublas/compute/cuda_dlag2h.cu index 8136c248fd9ff2c02a4910b251f282acdbe4cc77..000fc7454655c9a493ee970179fc28192f27f04b 100644 --- a/gpucublas/compute/cuda_dlag2h.cu +++ b/gpucublas/compute/cuda_dlag2h.cu @@ -133,6 +133,7 @@ CUDA_dlag2h( int m, int n, cublasHandle_t handle ) { cudaStream_t stream; + cudaError_t err; double rmax; if ( m < 0 ) { @@ -166,6 +167,13 @@ CUDA_dlag2h( int m, int n, cuda_dlag2h_kernel<<< grid, threads, 0, stream >>>( m, n, A, lda, HA, ldha, rmax ); + err = cudaGetLastError(); + if ( err != cudaSuccess ) + { + fprintf( stderr, "CUDA_dlag2h failed to launch CUDA kernel %s\n", cudaGetErrorString(err) ); + return CHAMELEON_ERR_UNEXPECTED; + } + return 0; } @@ -261,6 +269,7 @@ CUDA_hlag2d( int m, int n, cublasHandle_t handle ) { cudaStream_t stream; + cudaError_t err; if ( m < 0 ) { return -1; @@ -286,5 +295,12 @@ CUDA_hlag2d( int m, int n, cublasGetStream( handle, &stream ); cuda_hlag2d_kernel<<< grid, threads, 0, stream >>> ( m, n, HA, ldha, A, lda ); + err = cudaGetLastError(); + if ( err != cudaSuccess ) + { + fprintf( stderr, "CUDA_hlag2d failed to launch CUDA kernel %s\n", cudaGetErrorString(err) ); + return CHAMELEON_ERR_UNEXPECTED; + } + return 0; } diff --git a/gpucublas/compute/cuda_zlag2c.cu b/gpucublas/compute/cuda_zlag2c.cu index cf3f35062fbe336c1c8f72119c659ba99e4693da..8b1fdfe7105a13cdb983a60de735d29d790eb38b 100644 --- a/gpucublas/compute/cuda_zlag2c.cu +++ b/gpucublas/compute/cuda_zlag2c.cu @@ -152,6 +152,7 @@ CUDA_zlag2c( int m, int n, cublasHandle_t handle ) { cudaStream_t stream; + cudaError_t err; double rmax; if ( m < 0 ) { @@ -180,6 +181,13 @@ CUDA_zlag2c( int m, int n, cuda_zlag2c_kernel<<< grid, threads, 0, stream >>>( m, n, A, lda, SA, ldsa, rmax ); + err = cudaGetLastError(); + if ( err != cudaSuccess ) + { + fprintf( stderr, "CUDA_zlag2c failed to launch CUDA kernel %s\n", cudaGetErrorString(err) ); + return CHAMELEON_ERR_UNEXPECTED; + } + return 0; } @@ -275,6 +283,7 @@ CUDA_clag2z( int m, int n, cublasHandle_t handle ) { cudaStream_t stream; + cudaError_t err; if ( m < 0 ) { return -1; @@ -300,5 +309,12 @@ CUDA_clag2z( int m, int n, cublasGetStream( handle, &stream ); cuda_clag2z_kernel<<< grid, threads, 0, stream >>> ( m, n, SA, ldsa, A, lda ); + err = cudaGetLastError(); + if ( err != cudaSuccess ) + { + fprintf( stderr, "CUDA_clag2z failed to launch CUDA kernel %s\n", cudaGetErrorString(err) ); + return CHAMELEON_ERR_UNEXPECTED; + } + return 0; }