From fd75373a88f505b6bd018965d29c14b12c0aff54 Mon Sep 17 00:00:00 2001 From: Mathieu Faverge <mathieu.faverge@inria.fr> Date: Sun, 4 Dec 2016 22:44:51 +0000 Subject: [PATCH] Restore unmlq/unmqr with cuda support in the codelets --- runtime/starpu/codelets/codelet_zunmlq.c | 15 ++++++++++----- runtime/starpu/codelets/codelet_zunmqr.c | 15 ++++++++++----- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/runtime/starpu/codelets/codelet_zunmlq.c b/runtime/starpu/codelets/codelet_zunmlq.c index 61a9ae8a5..0f1c8a8bc 100644 --- a/runtime/starpu/codelets/codelet_zunmlq.c +++ b/runtime/starpu/codelets/codelet_zunmlq.c @@ -185,7 +185,7 @@ static void cl_zunmlq_cpu_func(void *descr[], void *cl_arg) A, lda, T, ldt, C, ldc, WORK, ldwork); } -#if defined(CHAMELEON_USE_MAGMA) +#if defined(CHAMELEON_USE_CUDA) static void cl_zunmlq_cuda_func(void *descr[], void *cl_arg) { MORSE_starpu_ws_t *d_work; @@ -207,19 +207,24 @@ static void cl_zunmlq_cuda_func(void *descr[], void *cl_arg) C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]); WORK = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[3]); /* ib * nb */ + stream = starpu_cuda_get_local_stream(); + cublasSetKernelStream( stream ); + CUDA_zunmlqt( side, trans, m, n, k, ib, - A, lda, T, ldt, C, ldc, WORK, ldwork ); + A, lda, T, ldt, C, ldc, WORK, ldwork, stream ); - cudaThreadSynchronize(); -} +#ifndef STARPU_CUDA_ASYNC + cudaStreamSynchronize( stream ); #endif +} +#endif /* defined(CHAMELEON_USE_CUDA) */ #endif /* !defined(CHAMELEON_SIMULATION) */ /* * Codelet definition */ -#if defined(CHAMELEON_USE_MAGMA) +#if defined(CHAMELEON_USE_CUDA) CODELETS(zunmlq, 4, cl_zunmlq_cpu_func, cl_zunmlq_cuda_func, 0) #else CODELETS_CPU(zunmlq, 4, cl_zunmlq_cpu_func) diff --git a/runtime/starpu/codelets/codelet_zunmqr.c b/runtime/starpu/codelets/codelet_zunmqr.c index e4e278fad..300f51d9b 100644 --- a/runtime/starpu/codelets/codelet_zunmqr.c +++ b/runtime/starpu/codelets/codelet_zunmqr.c @@ -211,7 +211,7 @@ static void cl_zunmqr_cpu_func(void *descr[], void *cl_arg) A, lda, T, ldt, C, ldc, WORK, ldwork); } -#if defined(CHAMELEON_USE_MAGMA) +#if defined(CHAMELEON_USE_CUDA) static void cl_zunmqr_cuda_func(void *descr[], void *cl_arg) { MORSE_starpu_ws_t *d_work; @@ -233,19 +233,24 @@ static void cl_zunmqr_cuda_func(void *descr[], void *cl_arg) C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]); WORK = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[3]); /* ib * nb */ + stream = starpu_cuda_get_local_stream(); + cublasSetKernelStream( stream ); + CUDA_zunmqrt( side, trans, m, n, k, ib, - A, lda, T, ldt, C, ldc, WORK, ldwork ); + A, lda, T, ldt, C, ldc, WORK, ldwork, stream ); - cudaThreadSynchronize(); -} +#ifndef STARPU_CUDA_ASYNC + cudaStreamSynchronize( stream ); #endif +} +#endif /* defined(CHAMELEON_USE_CUDA) */ #endif /* !defined(CHAMELEON_SIMULATION) */ /* * Codelet definition */ -#if defined(CHAMELEON_USE_MAGMA) +#if defined(CHAMELEON_USE_CUDA) CODELETS(zunmqr, 4, cl_zunmqr_cpu_func, cl_zunmqr_cuda_func, 0) #else CODELETS_CPU(zunmqr, 4, cl_zunmqr_cpu_func) -- GitLab