Mentions légales du service

Skip to content
Snippets Groups Projects
Commit b3bd4295 authored by Mathieu Faverge's avatar Mathieu Faverge
Browse files

All LQ/QR routines are working on GPUS again. Let's hope I will not have to fix them one more time.

parent 6b5343f8
No related branches found
No related tags found
No related merge requests found
...@@ -77,7 +77,7 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans, ...@@ -77,7 +77,7 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans,
*/ */
ws_worker = A->nb * ib; ws_worker = A->nb * ib;
#if defined(CHAMELEON_USE_MAGMA) #if defined(CHAMELEON_USE_CUDA)
/* Worker space /* Worker space
* *
* zunmlq = A->nb * ib * zunmlq = A->nb * ib
...@@ -118,7 +118,7 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans, ...@@ -118,7 +118,7 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans,
MorseUpper, tempkmin, tempNn, A->nb, MorseUpper, tempkmin, tempNn, A->nb,
A(k, N), ldak, A(k, N), ldak,
DIAG(k, N), ldak ); DIAG(k, N), ldak );
#if defined(CHAMELEON_USE_MAGMA) #if defined(CHAMELEON_USE_CUDA)
MORSE_TASK_zlaset( MORSE_TASK_zlaset(
&options, &options,
MorseLower, tempkmin, tempNn, MorseLower, tempkmin, tempNn,
...@@ -230,7 +230,7 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans, ...@@ -230,7 +230,7 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans,
MorseUpper, tempkmin, tempNn, A->nb, MorseUpper, tempkmin, tempNn, A->nb,
A(k, N), ldak, A(k, N), ldak,
DIAG(k, N), ldak ); DIAG(k, N), ldak );
#if defined(CHAMELEON_USE_MAGMA) #if defined(CHAMELEON_USE_CUDA)
MORSE_TASK_zlaset( MORSE_TASK_zlaset(
&options, &options,
MorseLower, tempkmin, tempNn, MorseLower, tempkmin, tempNn,
...@@ -307,7 +307,7 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans, ...@@ -307,7 +307,7 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans,
MorseUpper, tempkmin, tempNn, A->nb, MorseUpper, tempkmin, tempNn, A->nb,
A(k, N), ldak, A(k, N), ldak,
DIAG(k, N), ldak ); DIAG(k, N), ldak );
#if defined(CHAMELEON_USE_MAGMA) #if defined(CHAMELEON_USE_CUDA)
MORSE_TASK_zlaset( MORSE_TASK_zlaset(
&options, &options,
MorseLower, tempkmin, tempNn, MorseLower, tempkmin, tempNn,
...@@ -346,7 +346,7 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans, ...@@ -346,7 +346,7 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans,
MorseUpper, tempkmin, tempNn, A->nb, MorseUpper, tempkmin, tempNn, A->nb,
A(k, N), ldak, A(k, N), ldak,
DIAG(k, N), ldak ); DIAG(k, N), ldak );
#if defined(CHAMELEON_USE_MAGMA) #if defined(CHAMELEON_USE_CUDA)
MORSE_TASK_zlaset( MORSE_TASK_zlaset(
&options, &options,
MorseLower, tempkmin, tempNn, MorseLower, tempkmin, tempNn,
......
...@@ -90,15 +90,22 @@ CUDA_zlarfb(MORSE_enum side, MORSE_enum trans, ...@@ -90,15 +90,22 @@ CUDA_zlarfb(MORSE_enum side, MORSE_enum trans,
else else
uplo = MorseLower; uplo = MorseLower;
if (storev == MorseColumnwise) {
notransV = MorseNoTrans;
transV = MorseConjTrans;
}
else {
notransV = MorseConjTrans;
transV = MorseNoTrans;
}
if ( side == MorseLeft ) { if ( side == MorseLeft ) {
// Form H C or H^H C // Form H C or H^H C
// Comments assume H C. When forming H^H C, T gets transposed via transT. // Comments assume H C. When forming H^H C, T gets transposed via transT.
transV = (storev == MorseColumnwise) ? MorseNoTrans : MorseConjTrans;
// W = C^H V // W = C^H V
cublasZgemm( CUBLAS_HANDLE cublasZgemm( CUBLAS_HANDLE
morse_lapack_const(MorseConjTrans), morse_lapack_const(transV), morse_lapack_const(MorseConjTrans), morse_lapack_const(notransV),
N, K, M, N, K, M,
CUBLAS_SADDR(zone), C, LDC, CUBLAS_SADDR(zone), C, LDC,
V, LDV, V, LDV,
...@@ -114,7 +121,7 @@ CUDA_zlarfb(MORSE_enum side, MORSE_enum trans, ...@@ -114,7 +121,7 @@ CUDA_zlarfb(MORSE_enum side, MORSE_enum trans,
// C = C - V W^H = C - V T V^H C = (I - V T V^H) C = H C // C = C - V W^H = C - V T V^H C = (I - V T V^H) C = H C
cublasZgemm( CUBLAS_HANDLE cublasZgemm( CUBLAS_HANDLE
morse_lapack_const(transV), morse_lapack_const(MorseConjTrans), morse_lapack_const(notransV), morse_lapack_const(MorseConjTrans),
M, N, K, M, N, K,
CUBLAS_SADDR(mzone), V, LDV, CUBLAS_SADDR(mzone), V, LDV,
WORK, LDWORK, WORK, LDWORK,
...@@ -124,11 +131,9 @@ CUDA_zlarfb(MORSE_enum side, MORSE_enum trans, ...@@ -124,11 +131,9 @@ CUDA_zlarfb(MORSE_enum side, MORSE_enum trans,
// Form C H or C H^H // Form C H or C H^H
// Comments assume C H. When forming C H^H, T gets transposed via trans. // Comments assume C H. When forming C H^H, T gets transposed via trans.
transV = (storev == MorseColumnwise) ? MorseConjTrans : MorseNoTrans;
// W = C V // W = C V
cublasZgemm( CUBLAS_HANDLE cublasZgemm( CUBLAS_HANDLE
morse_lapack_const(MorseNoTrans), morse_lapack_const(transV), morse_lapack_const(MorseNoTrans), morse_lapack_const(notransV),
M, K, N, M, K, N,
CUBLAS_SADDR(zone), C, LDC, CUBLAS_SADDR(zone), C, LDC,
V, LDV, V, LDV,
......
...@@ -266,8 +266,4 @@ static void cl_ztsmlq_cuda_func(void *descr[], void *cl_arg) ...@@ -266,8 +266,4 @@ static void cl_ztsmlq_cuda_func(void *descr[], void *cl_arg)
/* /*
* Codelet definition * Codelet definition
*/ */
#if defined(CHAMELEON_USE_CUDA)
CODELETS(ztsmlq, 5, cl_ztsmlq_cpu_func, cl_ztsmlq_cuda_func, STARPU_CUDA_ASYNC) CODELETS(ztsmlq, 5, cl_ztsmlq_cpu_func, cl_ztsmlq_cuda_func, STARPU_CUDA_ASYNC)
#else
CODELETS_CPU(ztsmlq, 5, cl_ztsmlq_cpu_func)
#endif
...@@ -299,8 +299,4 @@ static void cl_ztsmqr_cuda_func(void *descr[], void *cl_arg) ...@@ -299,8 +299,4 @@ static void cl_ztsmqr_cuda_func(void *descr[], void *cl_arg)
/* /*
* Codelet definition * Codelet definition
*/ */
#if defined(CHAMELEON_USE_CUDA)
CODELETS(ztsmqr, 5, cl_ztsmqr_cpu_func, cl_ztsmqr_cuda_func, STARPU_CUDA_ASYNC) CODELETS(ztsmqr, 5, cl_ztsmqr_cpu_func, cl_ztsmqr_cuda_func, STARPU_CUDA_ASYNC)
#else
CODELETS_CPU(ztsmqr, 5, cl_ztsmqr_cpu_func)
#endif
...@@ -225,8 +225,4 @@ static void cl_zunmlq_cuda_func(void *descr[], void *cl_arg) ...@@ -225,8 +225,4 @@ static void cl_zunmlq_cuda_func(void *descr[], void *cl_arg)
/* /*
* Codelet definition * Codelet definition
*/ */
#if defined(CHAMELEON_USE_CUDA) CODELETS(zunmlq, 4, cl_zunmlq_cpu_func, cl_zunmlq_cuda_func, STARPU_CUDA_ASYNC);
CODELETS(zunmlq, 4, cl_zunmlq_cpu_func, cl_zunmlq_cuda_func, 0)
#else
CODELETS_CPU(zunmlq, 4, cl_zunmlq_cpu_func)
#endif
...@@ -251,8 +251,4 @@ static void cl_zunmqr_cuda_func(void *descr[], void *cl_arg) ...@@ -251,8 +251,4 @@ static void cl_zunmqr_cuda_func(void *descr[], void *cl_arg)
/* /*
* Codelet definition * Codelet definition
*/ */
#if defined(CHAMELEON_USE_CUDA) CODELETS(zunmqr, 4, cl_zunmqr_cpu_func, cl_zunmqr_cuda_func, STARPU_CUDA_ASYNC)
CODELETS(zunmqr, 4, cl_zunmqr_cpu_func, cl_zunmqr_cuda_func, 0)
#else
CODELETS_CPU(zunmqr, 4, cl_zunmqr_cpu_func)
#endif
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment