Mentions légales du service
Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
Chameleon
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Container Registry
Operate
Environments
Terraform modules
Monitor
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
solverstack
Chameleon
Commits
d1dc8c25
Commit
d1dc8c25
authored
7 months ago
by
Ana Hourcau
Committed by
Mathieu Faverge
6 months ago
Browse files
Options
Downloads
Patches
Plain Diff
Adapting gered and gerst codelets cuda parts
parent
7fc32871
No related branches found
No related tags found
1 merge request
!488
Mixed precision
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
runtime/starpu/codelets/codelet_zgered.c
+39
-42
39 additions, 42 deletions
runtime/starpu/codelets/codelet_zgered.c
runtime/starpu/codelets/codelet_zgerst.c
+23
-21
23 additions, 21 deletions
runtime/starpu/codelets/codelet_zgerst.c
with
62 additions
and
63 deletions
runtime/starpu/codelets/codelet_zgered.c
+
39
−
42
View file @
d1dc8c25
...
...
@@ -71,8 +71,7 @@ void INSERT_TASK_zgered( const RUNTIME_option_t *options,
#if defined(CHAMELEON_USE_MPI)
/* Backup the MPI tag */
if
(
A
->
myrank
==
tileA
->
rank
)
{
if
(
A
->
myrank
==
tileA
->
rank
)
{
tag
=
starpu_mpi_data_get_tag
(
*
handleAin
);
}
#endif
/* defined(CHAMELEON_USE_MPI) */
...
...
@@ -89,39 +88,41 @@ void INSERT_TASK_zgered( const RUNTIME_option_t *options,
#if defined(CHAMELEON_DEBUG_GERED)
fprintf
(
stderr
,
"[%2d] Convert the tile ( %d, %d ) to half precision
\n
"
,
A
->
myrank
,
Am
,
An
);
A
->
myrank
,
Am
,
An
);
#endif
starpu_cham_tile_register
(
&
handleAout
,
-
1
,
tileA
,
ChamComplexHalf
);
if
(
A
->
myrank
==
tileA
->
rank
)
{
starpu_cham_tile_register
(
&
handleAout
,
-
1
,
tileA
,
ChamComplexHalf
);
rt_shm_starpu_insert_task
(
&
cl_dlag2h
,
STARPU_VALUE
,
&
m
,
sizeof
(
int
),
STARPU_VALUE
,
&
n
,
sizeof
(
int
),
STARPU_R
,
*
handleAin
,
STARPU_W
,
handleAout
,
STARPU_PRIORITY
,
options
->
priority
,
STARPU_EXECUTE_ON_WORKER
,
options
->
workerid
,
rt_shm_starpu_insert_task
(
&
cl_dlag2h
,
STARPU_VALUE
,
&
m
,
sizeof
(
int
),
STARPU_VALUE
,
&
n
,
sizeof
(
int
),
STARPU_R
,
*
handleAin
,
STARPU_W
,
handleAout
,
STARPU_PRIORITY
,
options
->
priority
,
STARPU_EXECUTE_ON_WORKER
,
options
->
workerid
,
#if defined(CHAMELEON_CODELETS_HAVE_NAME)
STARPU_NAME
,
"dlag2h"
,
STARPU_NAME
,
"dlag2h"
,
#endif
0
);
0
);
starpu_data_unregister_no_coherency
(
*
handleAin
);
*
handleAin
=
handleAout
;
tileA
->
flttype
=
ChamComplexHalf
;
starpu_mpi_data_register
(
handleAout
,
tag
,
tileA
->
rank
);
}
else
{
tileA
->
flttype
=
ChamComplexHalf
;
if
(
*
handleAin
!=
NULL
)
starpu_data_unregister_no_coherency
(
*
handleAin
);
*
handleAin
=
handleAout
;
tileA
->
flttype
=
ChamComplexHalf
;
starpu_mpi_data_register
(
handleAout
,
tag
,
tileA
->
rank
);
}
else
{
starpu_data_unregister_no_coherency
(
*
handleAin
);
*
handleAin
=
NULL
;
tileA
->
flttype
=
ChamComplexHalf
;
if
(
*
handleAin
!=
NULL
)
{
starpu_data_unregister_no_coherency
(
*
handleAin
);
*
handleAin
=
NULL
;
}
}
return
;
}
return
;
}
#endif
#endif
...
...
@@ -129,11 +130,7 @@ void INSERT_TASK_zgered( const RUNTIME_option_t *options,
/*
* Check for single precision
*/
#if !defined(CHAMELEON_SIMULATION)
u_low
=
LAPACKE_slamch_work
(
'e'
);
#else
u_low
=
1e-8
;
#endif
u_low
=
CHAMELEON_slamch
();
if
(
lnorm
<
(
threshold
/
u_low
)
)
{
#if defined(CHAMELEON_DEBUG_GERED)
...
...
@@ -141,34 +138,34 @@ void INSERT_TASK_zgered( const RUNTIME_option_t *options,
"[%2d] Convert the tile ( %d, %d ) to single precision
\n
"
,
A
->
myrank
,
Am
,
An
);
#endif
if
(
A
->
myrank
==
tileA
->
rank
)
if
(
A
->
myrank
==
tileA
->
rank
)
{
starpu_cham_tile_register
(
&
handleAout
,
-
1
,
tileA
,
ChamComplexFloat
);
rt_shm_starpu_insert_task
(
&
cl_zlag2c
,
STARPU_VALUE
,
&
m
,
sizeof
(
int
),
STARPU_VALUE
,
&
n
,
sizeof
(
int
),
STARPU_R
,
*
handleAin
,
STARPU_W
,
handleAout
,
STARPU_PRIORITY
,
options
->
priority
,
STARPU_VALUE
,
&
m
,
sizeof
(
int
),
STARPU_VALUE
,
&
n
,
sizeof
(
int
),
STARPU_R
,
*
handleAin
,
STARPU_W
,
handleAout
,
STARPU_PRIORITY
,
options
->
priority
,
STARPU_EXECUTE_ON_WORKER
,
options
->
workerid
,
#if defined(CHAMELEON_CODELETS_HAVE_NAME)
STARPU_NAME
,
"zlag2c"
,
STARPU_NAME
,
"zlag2c"
,
#endif
0
);
starpu_data_unregister_no_coherency
(
*
handleAin
);
*
handleAin
=
handleAout
;
*
handleAin
=
handleAout
;
tileA
->
flttype
=
ChamComplexFloat
;
starpu_mpi_data_register
(
*
handleAin
,
tag
,
tileA
->
rank
);
}
else
{
tileA
->
flttype
=
ChamComplexFloat
;
if
(
*
handleAin
!=
NULL
)
if
(
*
handleAin
!=
NULL
)
{
starpu_data_unregister_no_coherency
(
*
handleAin
);
starpu_data_unregister_no_coherency
(
*
handleAin
);
*
handleAin
=
NULL
;
}
}
...
...
This diff is collapsed.
Click to expand it.
runtime/starpu/codelets/codelet_zgerst.c
+
23
−
21
View file @
d1dc8c25
...
...
@@ -39,7 +39,8 @@ void INSERT_TASK_zgerst( const RUNTIME_option_t *options,
handleAin
=
A
->
schedopt
;
handleAin
+=
((
int64_t
)
A
->
lmt
)
*
nn
+
mm
;
if
(
tileA
->
flttype
==
ChamComplexDouble
)
{
if
(
tileA
->
flttype
==
ChamComplexDouble
)
{
starpu_data_handle_t
*
copy
=
handleAin
;
/* Remove first copy */
...
...
@@ -59,12 +60,12 @@ void INSERT_TASK_zgerst( const RUNTIME_option_t *options,
return
;
}
if
(
A
->
myrank
!=
tileA
->
rank
)
if
(
A
->
myrank
!=
tileA
->
rank
)
{
tileA
->
flttype
=
ChamComplexDouble
;
if
(
*
handleAin
!=
NULL
)
if
(
*
handleAin
!=
NULL
)
{
starpu_data_unregister_no_coherency
(
*
handleAin
);
starpu_data_unregister_no_coherency
(
*
handleAin
);
*
handleAin
=
NULL
;
}
return
;
...
...
@@ -79,9 +80,9 @@ void INSERT_TASK_zgerst( const RUNTIME_option_t *options,
switch
(
tileA
->
flttype
)
{
#if defined(CHAMELEON_USE_CUDA) && (CUDA_VERSION >= 7500)
#if defined(PRECISION_d)
/*
* Restore from half precision
*/
/*
* Restore from half precision
*/
case
ChamComplexHalf
:
assert
(
options
->
withcuda
);
#if defined(CHAMELEON_DEBUG_GERED)
...
...
@@ -91,14 +92,14 @@ void INSERT_TASK_zgerst( const RUNTIME_option_t *options,
#endif
rt_shm_starpu_insert_task
(
&
cl_hlag2d
,
STARPU_VALUE
,
&
m
,
sizeof
(
int
),
STARPU_VALUE
,
&
n
,
sizeof
(
int
),
STARPU_R
,
*
handleAin
,
STARPU_W
,
handleAout
,
STARPU_PRIORITY
,
options
->
priority
,
STARPU_VALUE
,
&
m
,
sizeof
(
int
),
STARPU_VALUE
,
&
n
,
sizeof
(
int
),
STARPU_R
,
*
handleAin
,
STARPU_W
,
handleAout
,
STARPU_PRIORITY
,
options
->
priority
,
STARPU_EXECUTE_ON_WORKER
,
options
->
workerid
,
#if defined(CHAMELEON_CODELETS_HAVE_NAME)
STARPU_NAME
,
"hlag2d"
,
STARPU_NAME
,
"hlag2d"
,
#endif
0
);
break
;
...
...
@@ -108,19 +109,20 @@ void INSERT_TASK_zgerst( const RUNTIME_option_t *options,
case
ChamComplexFloat
:
#if defined(CHAMELEON_DEBUG_GERED)
fprintf
(
stderr
,
"[%2d] Convert back the tile ( %d, %d ) from
half
precision
\n
"
,
"[%2d] Convert back the tile ( %d, %d ) from
single
precision
\n
"
,
A
->
myrank
,
Am
,
An
);
#endif
rt_shm_starpu_insert_task
(
&
cl_clag2z
,
STARPU_VALUE
,
&
m
,
sizeof
(
int
),
STARPU_VALUE
,
&
n
,
sizeof
(
int
),
STARPU_R
,
*
handleAin
,
STARPU_W
,
handleAout
,
STARPU_PRIORITY
,
options
->
priority
,
STARPU_VALUE
,
&
m
,
sizeof
(
int
),
STARPU_VALUE
,
&
n
,
sizeof
(
int
),
STARPU_R
,
*
handleAin
,
STARPU_W
,
handleAout
,
STARPU_PRIORITY
,
options
->
priority
,
STARPU_EXECUTE_ON_WORKER
,
options
->
workerid
,
#if defined(CHAMELEON_CODELETS_HAVE_NAME)
STARPU_NAME
,
"clag2z"
,
STARPU_NAME
,
"clag2z"
,
#endif
0
);
break
;
...
...
@@ -130,7 +132,7 @@ void INSERT_TASK_zgerst( const RUNTIME_option_t *options,
}
starpu_data_unregister_no_coherency
(
*
handleAin
);
*
handleAin
=
handleAout
;
*
handleAin
=
handleAout
;
tileA
->
flttype
=
ChamComplexDouble
;
starpu_mpi_data_register
(
handleAout
,
tag
,
tileA
->
rank
);
}
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment