Mentions légales du service
Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
Chameleon
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container Registry
Operate
Environments
Terraform modules
Monitor
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
solverstack
Chameleon
Commits
1ea98937
Commit
1ea98937
authored
2 years ago
by
Mathieu Faverge
Browse files
Options
Downloads
Plain Diff
Merge branch 'fix/zgersum-GPU' into 'master'
Fix/zgersum gpu See merge request
!341
parents
5155ebde
034f7634
No related branches found
No related tags found
1 merge request
!341
Fix/zgersum gpu
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
runtime/starpu/codelets/codelet_zgeadd.c
+24
-25
24 additions, 25 deletions
runtime/starpu/codelets/codelet_zgeadd.c
runtime/starpu/codelets/codelet_zgersum.c
+30
-15
30 additions, 15 deletions
runtime/starpu/codelets/codelet_zgersum.c
with
54 additions
and
40 deletions
runtime/starpu/codelets/codelet_zgeadd.c
+
24
−
25
View file @
1ea98937
...
@@ -26,39 +26,42 @@
...
@@ -26,39 +26,42 @@
#include
"runtime_codelet_z.h"
#include
"runtime_codelet_z.h"
#if !defined(CHAMELEON_SIMULATION)
#if !defined(CHAMELEON_SIMULATION)
static
void
cl_zgeadd_cpu_func
(
void
*
descr
[],
void
*
cl_arg
)
static
void
cl_zgeadd_cpu_func
(
void
*
descr
[],
void
*
cl_arg
)
{
{
cham_trans_t
trans
;
cham_trans_t
trans
;
int
M
;
int
M
;
int
N
;
int
N
;
CHAMELEON_Complex64_t
alpha
;
CHAMELEON_Complex64_t
alpha
;
CHAM_tile_t
*
tileA
;
CHAM_tile_t
*
tileA
;
CHAMELEON_Complex64_t
beta
;
CHAMELEON_Complex64_t
beta
;
CHAM_tile_t
*
tileB
;
CHAM_tile_t
*
tileB
;
tileA
=
cti_interface_get
(
descr
[
0
]);
tileA
=
cti_interface_get
(
descr
[
0
]);
tileB
=
cti_interface_get
(
descr
[
1
]);
tileB
=
cti_interface_get
(
descr
[
1
]);
starpu_codelet_unpack_args
(
cl_arg
,
&
trans
,
&
M
,
&
N
,
&
alpha
,
&
beta
);
starpu_codelet_unpack_args
(
cl_arg
,
&
trans
,
&
M
,
&
N
,
&
alpha
,
&
beta
);
TCORE_zgeadd
(
trans
,
M
,
N
,
alpha
,
tileA
,
beta
,
tileB
);
TCORE_zgeadd
(
trans
,
M
,
N
,
alpha
,
tileA
,
beta
,
tileB
);
return
;
return
;
}
}
#ifdef CHAMELEON_USE_CUBLAS
#if defined(CHAMELEON_USE_CUDA)
static
void
cl_zgeadd_cuda_func
(
void
*
descr
[],
void
*
cl_arg
)
static
void
cl_zgeadd_cuda_func
(
void
*
descr
[],
void
*
cl_arg
)
{
{
cublasHandle_t
handle
=
starpu_cublas_get_local_handle
();
cublasHandle_t
handle
=
starpu_cublas_get_local_handle
();
cham_trans_t
trans
;
cham_trans_t
trans
;
int
M
;
int
M
;
int
N
;
int
N
;
cuDoubleComplex
alpha
;
cuDoubleComplex
alpha
;
CHAM_tile_t
*
tileA
;
CHAM_tile_t
*
tileA
;
cuDoubleComplex
beta
;
cuDoubleComplex
beta
;
CHAM_tile_t
*
tileB
;
CHAM_tile_t
*
tileB
;
tileA
=
cti_interface_get
(
descr
[
0
]);
tileA
=
cti_interface_get
(
descr
[
0
]
);
tileB
=
cti_interface_get
(
descr
[
1
]);
tileB
=
cti_interface_get
(
descr
[
1
]
);
starpu_codelet_unpack_args
(
cl_arg
,
&
trans
,
&
M
,
&
N
,
&
alpha
,
&
beta
);
starpu_codelet_unpack_args
(
cl_arg
,
&
trans
,
&
M
,
&
N
,
&
alpha
,
&
beta
);
CUDA_zgeadd
(
trans
,
M
,
N
,
CUDA_zgeadd
(
trans
,
M
,
N
,
&
alpha
,
tileA
->
mat
,
tileA
->
ld
,
&
alpha
,
tileA
->
mat
,
tileA
->
ld
,
...
@@ -67,17 +70,13 @@ static void cl_zgeadd_cuda_func(void *descr[], void *cl_arg)
...
@@ -67,17 +70,13 @@ static void cl_zgeadd_cuda_func(void *descr[], void *cl_arg)
return
;
return
;
}
}
#endif
/* defined(CHAMELEON_USE_CU
BLAS
) */
#endif
/* defined(CHAMELEON_USE_CU
DA
) */
#endif
/* !defined(CHAMELEON_SIMULATION) */
#endif
/* !defined(CHAMELEON_SIMULATION) */
/*
/*
* Codelet definition
* Codelet definition
*/
*/
#if defined(CHAMELEON_USE_CUBLAS)
CODELETS
(
zgeadd
,
cl_zgeadd_cpu_func
,
cl_zgeadd_cuda_func
,
STARPU_CUDA_ASYNC
);
CODELETS
(
zgeadd
,
cl_zgeadd_cpu_func
,
cl_zgeadd_cuda_func
,
STARPU_CUDA_ASYNC
)
#else
CODELETS_CPU
(
zgeadd
,
cl_zgeadd_cpu_func
)
#endif
void
INSERT_TASK_zgeadd
(
const
RUNTIME_option_t
*
options
,
void
INSERT_TASK_zgeadd
(
const
RUNTIME_option_t
*
options
,
cham_trans_t
trans
,
int
m
,
int
n
,
int
nb
,
cham_trans_t
trans
,
int
m
,
int
n
,
int
nb
,
...
...
This diff is collapsed.
Click to expand it.
runtime/starpu/codelets/codelet_zgersum.c
+
30
−
15
View file @
1ea98937
...
@@ -22,7 +22,8 @@
...
@@ -22,7 +22,8 @@
#include
"runtime_codelet_z.h"
#include
"runtime_codelet_z.h"
#if !defined(CHAMELEON_SIMULATION)
#if !defined(CHAMELEON_SIMULATION)
static
void
cl_zgersum_redux_cpu_func
(
void
*
descr
[],
void
*
cl_arg
)
static
void
cl_zgersum_redux_cpu_func
(
void
*
descr
[],
void
*
cl_arg
)
{
{
CHAM_tile_t
*
tileA
;
CHAM_tile_t
*
tileA
;
CHAM_tile_t
*
tileB
;
CHAM_tile_t
*
tileB
;
...
@@ -38,13 +39,14 @@ static void cl_zgersum_redux_cpu_func(void *descr[], void *cl_arg)
...
@@ -38,13 +39,14 @@ static void cl_zgersum_redux_cpu_func(void *descr[], void *cl_arg)
return
;
return
;
}
}
#ifdef CHAMELEON_USE_CUBLAS
#if defined(CHAMELEON_USE_CUDA)
static
void
cl_zgersum_redux_cuda_func
(
void
*
descr
[],
void
*
cl_arg
)
static
void
cl_zgersum_redux_cuda_func
(
void
*
descr
[],
void
*
cl_arg
)
{
{
cublasHandle_t
handle
=
starpu_cublas_get_local_handle
();
cublasHandle_t
handle
=
starpu_cublas_get_local_handle
();
CHAMELEON_Complex64_t
zone
=
1
.;
CHAMELEON_Complex64_t
zone
=
1
.;
CHAM_tile_t
*
tileA
;
CHAM_tile_t
*
tileA
;
CHAM_tile_t
*
tileB
;
CHAM_tile_t
*
tileB
;
tileA
=
cti_interface_get
(
descr
[
0
]);
tileA
=
cti_interface_get
(
descr
[
0
]);
tileB
=
cti_interface_get
(
descr
[
1
]);
tileB
=
cti_interface_get
(
descr
[
1
]);
...
@@ -59,17 +61,13 @@ static void cl_zgersum_redux_cuda_func(void *descr[], void *cl_arg)
...
@@ -59,17 +61,13 @@ static void cl_zgersum_redux_cuda_func(void *descr[], void *cl_arg)
return
;
return
;
}
}
#endif
/* defined(CHAMELEON_USE_CU
BLAS
) */
#endif
/* defined(CHAMELEON_USE_CU
DA
) */
#endif
/* !defined(CHAMELEON_SIMULATION) */
#endif
/* !defined(CHAMELEON_SIMULATION) */
/*
/*
* Codelet definition
* Codelet definition
*/
*/
#if defined(CHAMELEON_USE_CUBLAS)
CODELETS
(
zgersum_redux
,
cl_zgersum_redux_cpu_func
,
cl_zgersum_redux_cuda_func
,
STARPU_CUDA_ASYNC
);
CODELETS
(
zgersum_redux
,
cl_zgersum_redux_cpu_func
,
cl_zgersum_redux_cuda_func
,
STARPU_CUDA_ASYNC
)
#else
CODELETS_CPU
(
zgersum_redux
,
cl_zgersum_redux_cpu_func
)
#endif
#if !defined(CHAMELEON_SIMULATION)
#if !defined(CHAMELEON_SIMULATION)
static
void
static
void
...
@@ -83,12 +81,29 @@ cl_zgersum_init_cpu_func( void *descr[], void *cl_arg )
...
@@ -83,12 +81,29 @@ cl_zgersum_init_cpu_func( void *descr[], void *cl_arg )
(
void
)
cl_arg
;
(
void
)
cl_arg
;
}
}
#if defined(CHAMELEON_USE_CUDA)
static
void
cl_zgersum_init_cuda_func
(
void
*
descr
[],
void
*
cl_arg
)
{
CHAM_tile_t
*
tileA
;
cublasStatus_t
rc
;
tileA
=
cti_interface_get
(
descr
[
0
]);
rc
=
cudaMemset2D
(
tileA
->
mat
,
tileA
->
ld
*
sizeof
(
CHAMELEON_Complex64_t
),
0
,
tileA
->
m
*
sizeof
(
CHAMELEON_Complex64_t
),
tileA
->
n
);
assert
(
rc
==
CUBLAS_STATUS_SUCCESS
);
(
void
)
cl_arg
;
}
#endif
/* defined(CHAMELEON_USE_CUDA) */
#endif
/* !defined(CHAMELEON_SIMULATION) */
#endif
/* !defined(CHAMELEON_SIMULATION) */
/*
/*
* Codelet definition
* Codelet definition
*/
*/
CODELETS
_CPU
(
zgersum_init
,
cl_zgersum_init_cpu_func
);
CODELETS
(
zgersum_init
,
cl_zgersum_init_cpu_func
,
cl_zgersum_init_cuda_func
,
STARPU_CUDA_ASYNC
);
void
void
RUNTIME_zgersum_set_methods
(
const
CHAM_desc_t
*
A
,
int
Am
,
int
An
)
RUNTIME_zgersum_set_methods
(
const
CHAM_desc_t
*
A
,
int
Am
,
int
An
)
...
@@ -108,7 +123,7 @@ RUNTIME_zgersum_submit_tree( const RUNTIME_option_t *options,
...
@@ -108,7 +123,7 @@ RUNTIME_zgersum_submit_tree( const RUNTIME_option_t *options,
starpu_mpi_redux_data_prio_tree
(
MPI_COMM_WORLD
,
starpu_mpi_redux_data_prio_tree
(
MPI_COMM_WORLD
,
RTBLKADDR
(
A
,
CHAMELEON_Complex64_t
,
Am
,
An
),
RTBLKADDR
(
A
,
CHAMELEON_Complex64_t
,
Am
,
An
),
options
->
priority
+
1
,
options
->
priority
+
1
,
2
/*
Arbre binair
e */
);
2
/*
Binary tre
e */
);
#else
#else
(
void
)
options
;
(
void
)
options
;
(
void
)
A
;
(
void
)
A
;
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment