Mentions légales du service

Skip to content
Snippets Groups Projects
Commit 1ea98937 authored by Mathieu Faverge's avatar Mathieu Faverge
Browse files

Merge branch 'fix/zgersum-GPU' into 'master'

Fix/zgersum gpu

See merge request !341
parents 5155ebde 034f7634
No related branches found
No related tags found
1 merge request!341Fix/zgersum gpu
...@@ -26,39 +26,42 @@ ...@@ -26,39 +26,42 @@
#include "runtime_codelet_z.h" #include "runtime_codelet_z.h"
#if !defined(CHAMELEON_SIMULATION) #if !defined(CHAMELEON_SIMULATION)
static void cl_zgeadd_cpu_func(void *descr[], void *cl_arg) static void
cl_zgeadd_cpu_func( void *descr[], void *cl_arg )
{ {
cham_trans_t trans; cham_trans_t trans;
int M; int M;
int N; int N;
CHAMELEON_Complex64_t alpha; CHAMELEON_Complex64_t alpha;
CHAM_tile_t *tileA; CHAM_tile_t *tileA;
CHAMELEON_Complex64_t beta; CHAMELEON_Complex64_t beta;
CHAM_tile_t *tileB; CHAM_tile_t *tileB;
tileA = cti_interface_get(descr[0]); tileA = cti_interface_get(descr[0]);
tileB = cti_interface_get(descr[1]); tileB = cti_interface_get(descr[1]);
starpu_codelet_unpack_args(cl_arg, &trans, &M, &N, &alpha, &beta); starpu_codelet_unpack_args( cl_arg, &trans, &M, &N, &alpha, &beta );
TCORE_zgeadd(trans, M, N, alpha, tileA, beta, tileB); TCORE_zgeadd( trans, M, N, alpha, tileA, beta, tileB );
return; return;
} }
#ifdef CHAMELEON_USE_CUBLAS #if defined(CHAMELEON_USE_CUDA)
static void cl_zgeadd_cuda_func(void *descr[], void *cl_arg) static void
cl_zgeadd_cuda_func( void *descr[], void *cl_arg )
{ {
cublasHandle_t handle = starpu_cublas_get_local_handle(); cublasHandle_t handle = starpu_cublas_get_local_handle();
cham_trans_t trans; cham_trans_t trans;
int M; int M;
int N; int N;
cuDoubleComplex alpha; cuDoubleComplex alpha;
CHAM_tile_t *tileA; CHAM_tile_t *tileA;
cuDoubleComplex beta; cuDoubleComplex beta;
CHAM_tile_t *tileB; CHAM_tile_t *tileB;
tileA = cti_interface_get(descr[0]); tileA = cti_interface_get( descr[0] );
tileB = cti_interface_get(descr[1]); tileB = cti_interface_get( descr[1] );
starpu_codelet_unpack_args(cl_arg, &trans, &M, &N, &alpha, &beta ); starpu_codelet_unpack_args( cl_arg, &trans, &M, &N, &alpha, &beta );
CUDA_zgeadd( trans, M, N, CUDA_zgeadd( trans, M, N,
&alpha, tileA->mat, tileA->ld, &alpha, tileA->mat, tileA->ld,
...@@ -67,17 +70,13 @@ static void cl_zgeadd_cuda_func(void *descr[], void *cl_arg) ...@@ -67,17 +70,13 @@ static void cl_zgeadd_cuda_func(void *descr[], void *cl_arg)
return; return;
} }
#endif /* defined(CHAMELEON_USE_CUBLAS) */ #endif /* defined(CHAMELEON_USE_CUDA) */
#endif /* !defined(CHAMELEON_SIMULATION) */ #endif /* !defined(CHAMELEON_SIMULATION) */
/* /*
* Codelet definition * Codelet definition
*/ */
#if defined(CHAMELEON_USE_CUBLAS) CODELETS( zgeadd, cl_zgeadd_cpu_func, cl_zgeadd_cuda_func, STARPU_CUDA_ASYNC );
CODELETS(zgeadd, cl_zgeadd_cpu_func, cl_zgeadd_cuda_func, STARPU_CUDA_ASYNC)
#else
CODELETS_CPU(zgeadd, cl_zgeadd_cpu_func)
#endif
void INSERT_TASK_zgeadd( const RUNTIME_option_t *options, void INSERT_TASK_zgeadd( const RUNTIME_option_t *options,
cham_trans_t trans, int m, int n, int nb, cham_trans_t trans, int m, int n, int nb,
......
...@@ -22,7 +22,8 @@ ...@@ -22,7 +22,8 @@
#include "runtime_codelet_z.h" #include "runtime_codelet_z.h"
#if !defined(CHAMELEON_SIMULATION) #if !defined(CHAMELEON_SIMULATION)
static void cl_zgersum_redux_cpu_func(void *descr[], void *cl_arg) static void
cl_zgersum_redux_cpu_func( void *descr[], void *cl_arg )
{ {
CHAM_tile_t *tileA; CHAM_tile_t *tileA;
CHAM_tile_t *tileB; CHAM_tile_t *tileB;
...@@ -38,13 +39,14 @@ static void cl_zgersum_redux_cpu_func(void *descr[], void *cl_arg) ...@@ -38,13 +39,14 @@ static void cl_zgersum_redux_cpu_func(void *descr[], void *cl_arg)
return; return;
} }
#ifdef CHAMELEON_USE_CUBLAS #if defined(CHAMELEON_USE_CUDA)
static void cl_zgersum_redux_cuda_func(void *descr[], void *cl_arg) static void
cl_zgersum_redux_cuda_func( void *descr[], void *cl_arg )
{ {
cublasHandle_t handle = starpu_cublas_get_local_handle(); cublasHandle_t handle = starpu_cublas_get_local_handle();
CHAMELEON_Complex64_t zone = 1.; CHAMELEON_Complex64_t zone = 1.;
CHAM_tile_t *tileA; CHAM_tile_t *tileA;
CHAM_tile_t *tileB; CHAM_tile_t *tileB;
tileA = cti_interface_get(descr[0]); tileA = cti_interface_get(descr[0]);
tileB = cti_interface_get(descr[1]); tileB = cti_interface_get(descr[1]);
...@@ -59,17 +61,13 @@ static void cl_zgersum_redux_cuda_func(void *descr[], void *cl_arg) ...@@ -59,17 +61,13 @@ static void cl_zgersum_redux_cuda_func(void *descr[], void *cl_arg)
return; return;
} }
#endif /* defined(CHAMELEON_USE_CUBLAS) */ #endif /* defined(CHAMELEON_USE_CUDA) */
#endif /* !defined(CHAMELEON_SIMULATION) */ #endif /* !defined(CHAMELEON_SIMULATION) */
/* /*
* Codelet definition * Codelet definition
*/ */
#if defined(CHAMELEON_USE_CUBLAS) CODELETS( zgersum_redux, cl_zgersum_redux_cpu_func, cl_zgersum_redux_cuda_func, STARPU_CUDA_ASYNC );
CODELETS(zgersum_redux, cl_zgersum_redux_cpu_func, cl_zgersum_redux_cuda_func, STARPU_CUDA_ASYNC)
#else
CODELETS_CPU(zgersum_redux, cl_zgersum_redux_cpu_func)
#endif
#if !defined(CHAMELEON_SIMULATION) #if !defined(CHAMELEON_SIMULATION)
static void static void
...@@ -83,12 +81,29 @@ cl_zgersum_init_cpu_func( void *descr[], void *cl_arg ) ...@@ -83,12 +81,29 @@ cl_zgersum_init_cpu_func( void *descr[], void *cl_arg )
(void)cl_arg; (void)cl_arg;
} }
#if defined(CHAMELEON_USE_CUDA)
static void
cl_zgersum_init_cuda_func( void *descr[], void *cl_arg )
{
CHAM_tile_t *tileA;
cublasStatus_t rc;
tileA = cti_interface_get(descr[0]);
rc = cudaMemset2D( tileA->mat, tileA->ld * sizeof(CHAMELEON_Complex64_t), 0,
tileA->m * sizeof(CHAMELEON_Complex64_t), tileA->n );
assert( rc == CUBLAS_STATUS_SUCCESS );
(void)cl_arg;
}
#endif /* defined(CHAMELEON_USE_CUDA) */
#endif /* !defined(CHAMELEON_SIMULATION) */ #endif /* !defined(CHAMELEON_SIMULATION) */
/* /*
* Codelet definition * Codelet definition
*/ */
CODELETS_CPU( zgersum_init, cl_zgersum_init_cpu_func ); CODELETS( zgersum_init, cl_zgersum_init_cpu_func, cl_zgersum_init_cuda_func, STARPU_CUDA_ASYNC );
void void
RUNTIME_zgersum_set_methods( const CHAM_desc_t *A, int Am, int An ) RUNTIME_zgersum_set_methods( const CHAM_desc_t *A, int Am, int An )
...@@ -108,7 +123,7 @@ RUNTIME_zgersum_submit_tree( const RUNTIME_option_t *options, ...@@ -108,7 +123,7 @@ RUNTIME_zgersum_submit_tree( const RUNTIME_option_t *options,
starpu_mpi_redux_data_prio_tree( MPI_COMM_WORLD, starpu_mpi_redux_data_prio_tree( MPI_COMM_WORLD,
RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
options->priority + 1, options->priority + 1,
2 /* Arbre binaire */ ); 2 /* Binary tree */ );
#else #else
(void)options; (void)options;
(void)A; (void)A;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment