Mentions légales du service

Skip to content
Snippets Groups Projects
Commit b3bd4295 authored by Mathieu Faverge's avatar Mathieu Faverge
Browse files

All LQ/QR routines are working on GPUS again. Let's hope I will not have to fix them one more time.

parent 6b5343f8
No related branches found
No related tags found
No related merge requests found
......@@ -77,7 +77,7 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans,
*/
ws_worker = A->nb * ib;
#if defined(CHAMELEON_USE_MAGMA)
#if defined(CHAMELEON_USE_CUDA)
/* Worker space
*
* zunmlq = A->nb * ib
......@@ -118,7 +118,7 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans,
MorseUpper, tempkmin, tempNn, A->nb,
A(k, N), ldak,
DIAG(k, N), ldak );
#if defined(CHAMELEON_USE_MAGMA)
#if defined(CHAMELEON_USE_CUDA)
MORSE_TASK_zlaset(
&options,
MorseLower, tempkmin, tempNn,
......@@ -230,7 +230,7 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans,
MorseUpper, tempkmin, tempNn, A->nb,
A(k, N), ldak,
DIAG(k, N), ldak );
#if defined(CHAMELEON_USE_MAGMA)
#if defined(CHAMELEON_USE_CUDA)
MORSE_TASK_zlaset(
&options,
MorseLower, tempkmin, tempNn,
......@@ -307,7 +307,7 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans,
MorseUpper, tempkmin, tempNn, A->nb,
A(k, N), ldak,
DIAG(k, N), ldak );
#if defined(CHAMELEON_USE_MAGMA)
#if defined(CHAMELEON_USE_CUDA)
MORSE_TASK_zlaset(
&options,
MorseLower, tempkmin, tempNn,
......@@ -346,7 +346,7 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans,
MorseUpper, tempkmin, tempNn, A->nb,
A(k, N), ldak,
DIAG(k, N), ldak );
#if defined(CHAMELEON_USE_MAGMA)
#if defined(CHAMELEON_USE_CUDA)
MORSE_TASK_zlaset(
&options,
MorseLower, tempkmin, tempNn,
......
......@@ -90,15 +90,22 @@ CUDA_zlarfb(MORSE_enum side, MORSE_enum trans,
else
uplo = MorseLower;
if (storev == MorseColumnwise) {
notransV = MorseNoTrans;
transV = MorseConjTrans;
}
else {
notransV = MorseConjTrans;
transV = MorseNoTrans;
}
if ( side == MorseLeft ) {
// Form H C or H^H C
// Comments assume H C. When forming H^H C, T gets transposed via transT.
transV = (storev == MorseColumnwise) ? MorseNoTrans : MorseConjTrans;
// W = C^H V
cublasZgemm( CUBLAS_HANDLE
morse_lapack_const(MorseConjTrans), morse_lapack_const(transV),
morse_lapack_const(MorseConjTrans), morse_lapack_const(notransV),
N, K, M,
CUBLAS_SADDR(zone), C, LDC,
V, LDV,
......@@ -114,7 +121,7 @@ CUDA_zlarfb(MORSE_enum side, MORSE_enum trans,
// C = C - V W^H = C - V T V^H C = (I - V T V^H) C = H C
cublasZgemm( CUBLAS_HANDLE
morse_lapack_const(transV), morse_lapack_const(MorseConjTrans),
morse_lapack_const(notransV), morse_lapack_const(MorseConjTrans),
M, N, K,
CUBLAS_SADDR(mzone), V, LDV,
WORK, LDWORK,
......@@ -124,11 +131,9 @@ CUDA_zlarfb(MORSE_enum side, MORSE_enum trans,
// Form C H or C H^H
// Comments assume C H. When forming C H^H, T gets transposed via trans.
transV = (storev == MorseColumnwise) ? MorseConjTrans : MorseNoTrans;
// W = C V
cublasZgemm( CUBLAS_HANDLE
morse_lapack_const(MorseNoTrans), morse_lapack_const(transV),
morse_lapack_const(MorseNoTrans), morse_lapack_const(notransV),
M, K, N,
CUBLAS_SADDR(zone), C, LDC,
V, LDV,
......
......@@ -266,8 +266,4 @@ static void cl_ztsmlq_cuda_func(void *descr[], void *cl_arg)
/*
* Codelet definition
*/
#if defined(CHAMELEON_USE_CUDA)
CODELETS(ztsmlq, 5, cl_ztsmlq_cpu_func, cl_ztsmlq_cuda_func, STARPU_CUDA_ASYNC)
#else
CODELETS_CPU(ztsmlq, 5, cl_ztsmlq_cpu_func)
#endif
......@@ -299,8 +299,4 @@ static void cl_ztsmqr_cuda_func(void *descr[], void *cl_arg)
/*
* Codelet definition
*/
#if defined(CHAMELEON_USE_CUDA)
CODELETS(ztsmqr, 5, cl_ztsmqr_cpu_func, cl_ztsmqr_cuda_func, STARPU_CUDA_ASYNC)
#else
CODELETS_CPU(ztsmqr, 5, cl_ztsmqr_cpu_func)
#endif
......@@ -225,8 +225,4 @@ static void cl_zunmlq_cuda_func(void *descr[], void *cl_arg)
/*
* Codelet definition
*/
#if defined(CHAMELEON_USE_CUDA)
CODELETS(zunmlq, 4, cl_zunmlq_cpu_func, cl_zunmlq_cuda_func, 0)
#else
CODELETS_CPU(zunmlq, 4, cl_zunmlq_cpu_func)
#endif
CODELETS(zunmlq, 4, cl_zunmlq_cpu_func, cl_zunmlq_cuda_func, STARPU_CUDA_ASYNC);
......@@ -251,8 +251,4 @@ static void cl_zunmqr_cuda_func(void *descr[], void *cl_arg)
/*
* Codelet definition
*/
#if defined(CHAMELEON_USE_CUDA)
CODELETS(zunmqr, 4, cl_zunmqr_cpu_func, cl_zunmqr_cuda_func, 0)
#else
CODELETS_CPU(zunmqr, 4, cl_zunmqr_cpu_func)
#endif
CODELETS(zunmqr, 4, cl_zunmqr_cpu_func, cl_zunmqr_cuda_func, STARPU_CUDA_ASYNC)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment