Mentions légales du service
Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
Chameleon
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Container Registry
Operate
Environments
Terraform modules
Monitor
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
solverstack
Chameleon
Commits
9855dcaa
Commit
9855dcaa
authored
4 months ago
by
LISITO Alycia
Browse files
Options
Downloads
Patches
Plain Diff
zgetrf: zperm_allreduce with starpu tasks version
parent
7fb2b629
No related branches found
No related tags found
1 merge request
!496
zgetrf: Allreduce perm
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
runtime/starpu/codelets/codelet_zperm_allreduce.c
+134
-0
134 additions, 0 deletions
runtime/starpu/codelets/codelet_zperm_allreduce.c
with
134 additions
and
0 deletions
runtime/starpu/codelets/codelet_zperm_allreduce.c
+
134
−
0
View file @
9855dcaa
...
@@ -20,6 +20,110 @@
...
@@ -20,6 +20,110 @@
#include
<coreblas/cblas_wrapper.h>
#include
<coreblas/cblas_wrapper.h>
#if defined(CHAMELEON_USE_MPI)
#if defined(CHAMELEON_USE_MPI)
struct
cl_redux_args_t
{
int
tempmm
;
int
n
;
int
p
;
int
q
;
int
p_first
;
int
me
;
int
shift
;
int
np_inv
;
};
static
void
cl_zperm_allreduce_cpu_func
(
void
*
descr
[],
void
*
cl_arg
)
{
struct
cl_redux_args_t
*
clargs
=
(
struct
cl_redux_args_t
*
)
cl_arg
;
const
CHAM_tile_t
*
tileUinout
=
cti_interface_get
(
descr
[
0
]
);
const
CHAM_tile_t
*
tileUin
=
cti_interface_get
(
descr
[
1
]
);
const
int
*
perm
=
(
int
*
)
STARPU_VECTOR_GET_PTR
(
descr
[
2
]
);
CHAMELEON_Complex64_t
*
Uinout
=
CHAM_tile_get_ptr
(
tileUinout
);
const
CHAMELEON_Complex64_t
*
Uin
=
CHAM_tile_get_ptr
(
tileUin
);
int
tempmm
=
clargs
->
tempmm
;
int
n
=
clargs
->
n
;
int
p
=
clargs
->
p
;
int
q
=
clargs
->
q
;
int
p_first
=
clargs
->
p_first
/
q
;
int
shift
=
clargs
->
shift
;
int
np
=
clargs
->
np_inv
;
int
me
=
(
p
<=
np
)
?
clargs
->
me
/
q
:
(
(
clargs
->
me
/
q
)
-
p_first
+
p
)
%
p
;
int
nb
=
tileUinout
->
n
;
int
mb
=
tileUinout
->
m
;
int
first
=
me
-
2
*
shift
+
1
;
int
last
=
me
-
shift
;
int
i
,
m
,
ownerp
;
for
(
i
=
0
;
i
<
tempmm
;
i
++
)
{
m
=
perm
[
i
]
/
mb
;
ownerp
=
(
p
<=
np
)
?
(
(
m
%
p
)
*
q
+
(
n
%
q
)
)
/
q
:
(
(
(
m
%
p
)
*
q
+
(
n
%
q
)
)
/
q
-
p_first
+
p
)
%
p
;
if
(
(
(
first
<=
ownerp
)
&&
(
ownerp
<=
last
)
)
||
(
(
first
+
np
<=
ownerp
)
&&
(
ownerp
<=
last
+
np
)
)
)
{
cblas_zcopy
(
nb
,
Uin
+
i
,
tileUin
->
ld
,
Uinout
+
i
,
tileUinout
->
ld
);
}
}
}
CODELETS_CPU
(
zperm_allreduce
,
cl_zperm_allreduce_cpu_func
)
static
void
INSERT_TASK_zperm_allreduce_send
(
const
RUNTIME_option_t
*
options
,
CHAM_desc_t
*
U
,
int
me
,
int
dst
,
int
n
)
{
rt_starpu_insert_task
(
NULL
,
STARPU_EXECUTE_ON_NODE
,
dst
,
STARPU_R
,
RTBLKADDR
(
U
,
CHAMELEON_Complex64_t
,
me
,
n
),
STARPU_PRIORITY
,
options
->
priority
,
0
);
}
static
void
INSERT_TASK_zperm_allreduce_recv
(
const
RUNTIME_option_t
*
options
,
CHAM_desc_t
*
U
,
CHAM_ipiv_t
*
ipiv
,
int
ipivk
,
int
me
,
int
src
,
int
n
,
int
tempmm
,
int
p
,
int
q
,
int
shift
,
int
np
,
int
p_first
)
{
struct
cl_redux_args_t
*
clargs
;
clargs
=
malloc
(
sizeof
(
struct
cl_redux_args_t
)
);
clargs
->
tempmm
=
tempmm
;
clargs
->
n
=
n
;
clargs
->
p
=
p
;
clargs
->
q
=
q
;
clargs
->
p_first
=
p_first
;
clargs
->
me
=
me
;
clargs
->
shift
=
shift
;
clargs
->
np_inv
=
np
;
rt_starpu_insert_task
(
&
cl_zperm_allreduce
,
STARPU_CL_ARGS
,
clargs
,
sizeof
(
struct
cl_redux_args_t
),
STARPU_RW
,
RTBLKADDR
(
U
,
CHAMELEON_Complex64_t
,
me
,
n
),
STARPU_R
,
RTBLKADDR
(
U
,
CHAMELEON_Complex64_t
,
src
,
n
),
STARPU_R
,
RUNTIME_perm_getaddr
(
ipiv
,
ipivk
),
STARPU_EXECUTE_ON_NODE
,
me
,
STARPU_EXECUTE_ON_WORKER
,
options
->
workerid
,
STARPU_PRIORITY
,
options
->
priority
,
0
);
starpu_mpi_cache_flush
(
options
->
sequence
->
comm
,
RTBLKADDR
(
U
,
CHAMELEON_Complex64_t
,
src
,
n
)
);
}
void
void
INSERT_TASK_zperm_allreduce
(
const
RUNTIME_option_t
*
options
,
INSERT_TASK_zperm_allreduce
(
const
RUNTIME_option_t
*
options
,
const
CHAM_desc_t
*
A
,
const
CHAM_desc_t
*
A
,
...
@@ -33,6 +137,36 @@ INSERT_TASK_zperm_allreduce( const RUNTIME_option_t *options,
...
@@ -33,6 +137,36 @@ INSERT_TASK_zperm_allreduce( const RUNTIME_option_t *options,
void
*
ws
)
void
*
ws
)
{
{
struct
chameleon_pzgetrf_s
*
tmp
=
(
struct
chameleon_pzgetrf_s
*
)
ws
;
struct
chameleon_pzgetrf_s
*
tmp
=
(
struct
chameleon_pzgetrf_s
*
)
ws
;
int
*
proc_involved
=
tmp
->
proc_involved
;
int
np_involved
=
chameleon_min
(
A
->
p
,
A
->
mt
-
k
);
int
np_iter
=
np_involved
;
int
p_recv
,
p_send
,
me
,
p_first
;
int
shift
=
1
;
if
(
np_involved
==
1
)
{
assert
(
proc_involved
[
0
]
==
A
->
myrank
);
}
else
{
p_first
=
proc_involved
[
0
];
for
(
me
=
0
;
me
<
np_involved
;
me
++
)
{
if
(
proc_involved
[
me
]
==
A
->
myrank
)
{
break
;
}
}
assert
(
me
<
np_involved
);
while
(
np_iter
>
1
)
{
p_send
=
proc_involved
[
(
me
+
shift
)
%
np_involved
];
p_recv
=
proc_involved
[
(
me
-
shift
+
np_involved
)
%
np_involved
];
INSERT_TASK_zperm_allreduce_send
(
options
,
U
,
A
->
myrank
,
p_send
,
n
);
INSERT_TASK_zperm_allreduce_recv
(
options
,
U
,
ipiv
,
ipivk
,
A
->
myrank
,
p_recv
,
n
,
k
==
(
A
->
mt
-
1
)
?
A
->
m
-
k
*
A
->
mb
:
A
->
mb
,
A
->
p
,
A
->
q
,
shift
,
np_involved
,
p_first
);
shift
=
shift
<<
1
;
np_iter
=
chameleon_ceil
(
np_iter
,
2
);
}
}
}
}
void
void
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment