Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
solverstack
mini-examples
starpu_example_dgemm
Commits
d0a6fd5b
Commit
d0a6fd5b
authored
Dec 02, 2021
by
Antoine Jego
Browse files
option to hand out a built context
parent
bfc633de
Changes
3
Hide whitespace changes
Inline
Side-by-side
README.org
View file @
d0a6fd5b
...
...
@@ -7,3 +7,8 @@ This is a mini-example of distributed gemm for starpu mpi, both in Fortran and C
make -j
make install
#+end_src
* TODO
- add warmup run
fstarpu_example_dgemm.f90
View file @
d0a6fd5b
...
...
@@ -39,6 +39,8 @@ program fstarpu_example_dgemm
logical
::
super_prune
=
.false.
logical
::
prune_handles
=
.false.
logical
::
delay
=
.false.
logical
::
provide_context
=
.false.
logical
::
warmup
=
.true.
integer
(
c_int
)
::
comm_size
,
comm_rank
integer
(
c_int
),
target
::
comm_world
...
...
@@ -58,6 +60,11 @@ program fstarpu_example_dgemm
integer
::
te
,
ts
,
tr
real
::
tf
,
gflops
integer
(
c_int
),
dimension
(:),
allocatable
::
procs
integer
(
c_int
)
::
ctx
integer
(
c_int
),
target
::
arg_ctx
character
(
kind
=
c_char
,
len
=
4
),
target
::
ctx_policy
=
C_CHAR_"lws"
//
C_NULL_CHAR
write
(
*
,
*
)
"initializing starpu ..."
ret
=
fstarpu_init
(
C_NULL_PTR
)
if
(
ret
==
-19
)
then
...
...
@@ -117,6 +124,11 @@ program fstarpu_example_dgemm
prune_handles
=
.true.
case
(
'-d'
)
delay
=
.true.
case
(
'-c'
)
provide_context
=
.true.
case
(
'-now'
)
warmup
=
.false.
! keep -e as an empty argument for debug purpose
end
select
end
do
...
...
@@ -133,12 +145,14 @@ program fstarpu_example_dgemm
write
(
*
,
'("mbxnbxkb = ",i5,"x",i5,"x",i5)'
)
mb
,
nb
,
kb
write
(
*
,
'("B = ",i5)'
)
bs
write
(
*
,
'("PxQ = ",i3,"x",i3)'
)
p
,
q
if
(
trace
)
write
(
*
,
*
)
"(T)racing enabled"
if
(
lflush
)
write
(
*
,
*
)
"(F)lushing enabled"
if
(
super_prune
)
write
(
*
,
*
)
"(S)uper-pruning enabled"
if
(
prune
)
write
(
*
,
*
)
"(P)runing enabled"
if
(
prune_handles
)
write
(
*
,
*
)
"(H)andles pruning enabled"
if
(
delay
)
write
(
*
,
*
)
"(D)elayed handle registration enabled"
if
(
trace
)
write
(
*
,
*
)
"(T)racing enabled"
if
(
lflush
)
write
(
*
,
*
)
"(F)lushing enabled"
if
(
super_prune
)
write
(
*
,
*
)
"(S)uper-pruning enabled"
if
(
prune
)
write
(
*
,
*
)
"(P)runing enabled"
if
(
prune_handles
)
write
(
*
,
*
)
"(H)andles pruning enabled"
if
(
delay
)
write
(
*
,
*
)
"(D)elayed handle registration enabled"
if
(
provide_context
)
write
(
*
,
*
)
"(C)ontext provided at submission"
if
(
.not.
warmup
)
write
(
*
,
*
)
"(W)armup disabled"
write
(
*
,
'("========================================")'
)
end
if
ret
=
fstarpu_mpi_barrier
(
comm_world
)
...
...
@@ -148,6 +162,14 @@ program fstarpu_example_dgemm
alpha
=
0.42
beta
=
3.14
if
(
provide_context
)
then
allocate
(
procs
(
ncpu
))
err
=
fstarpu_worker_get_ids_by_type
(
FSTARPU_CPU_WORKER
,
procs
,
ncpu
)
ctx
=
fstarpu_sched_ctx_create
(
procs
,
ncpu
,
C_CHAR_"stdalone"
//
C_NULL_CHAR
,&
(/
FSTARPU_SCHED_CTX_POLICY_NAME
,
c_loc
(
ctx_policy
),
c_null_ptr
/)
)
end
if
if
(
warmup
)
t
=
t
+
1
do
trial
=
1
,
t
! allocate matrices
call
initialize_matrix
(
A
,
mb
,
kb
,
"A"
,
.true.
,
.false.
)
...
...
@@ -186,13 +208,25 @@ program fstarpu_example_dgemm
call
block_register
(
B
,
l
,
j
)
call
block_register
(
C
,
i
,
j
)
end
if
call
fstarpu_mpi_task_insert
((/
c_loc
(
comm_world
),
cl_mm
,
&
if
(
provide_context
)
then
arg_ctx
=
ctx
call
fstarpu_mpi_task_insert
((/
c_loc
(
comm_world
),
cl_mm
,
&
FSTARPU_VALUE
,
c_loc
(
alpha
),
FSTARPU_SZ_REAL8
,
&
FSTARPU_VALUE
,
c_loc
(
zbeta
),
FSTARPU_SZ_REAL8
,
&
FSTARPU_R
,
A
%
blocks
(
i
,
l
)
%
h
,
&
FSTARPU_R
,
B
%
blocks
(
l
,
j
)
%
h
,
&
FSTARPU_RW
,
C
%
blocks
(
i
,
j
)
%
h
,
&
FSTARPU_SCHED_CTX
,
c_loc
(
arg_ctx
),
&
c_null_ptr
/))
else
call
fstarpu_mpi_task_insert
((/
c_loc
(
comm_world
),
cl_mm
,
&
FSTARPU_VALUE
,
c_loc
(
alpha
),
FSTARPU_SZ_REAL8
,
&
FSTARPU_VALUE
,
c_loc
(
zbeta
),
FSTARPU_SZ_REAL8
,
&
FSTARPU_R
,
A
%
blocks
(
i
,
l
)
%
h
,
&
FSTARPU_R
,
B
%
blocks
(
l
,
j
)
%
h
,
&
FSTARPU_RW
,
C
%
blocks
(
i
,
j
)
%
h
,
&
c_null_ptr
/))
end
if
else
!could write something
end
if
...
...
@@ -211,7 +245,8 @@ program fstarpu_example_dgemm
call
system_clock
(
te
,
tr
)
tf
=
max
(
real
(
te
-
ts
)/
real
(
tr
),
1e-20
)
gflops
=
2.0
*
m
*
n
*
k
/(
tf
*
10
**
9
)
if
(
comm_rank
.eq.
0
)
write
(
*
,
'("RANK ",i3," -> took ",e15.8," s | ", e15.8," Gflop/s")'
)
&
if
(
comm_rank
.eq.
0.
and
.
(
.not.
warmup
.or.
trial
.gt.
1
))
&
write
(
*
,
'("RANK ",i3," -> took ",e15.8," s | ", e15.8," Gflop/s")'
)
&
comm_rank
,
tf
,
gflops
! unregister matrices
...
...
@@ -220,6 +255,10 @@ program fstarpu_example_dgemm
call
unregister_matrix
(
C
,
mb
,
nb
)
end
do
if
(
provide_context
)
then
call
fstarpu_sched_ctx_delete
(
ctx
)
deallocate
(
procs
)
endif
call
fstarpu_codelet_free
(
cl_mm
)
call
fstarpu_codelet_free
(
cl_fill
)
...
...
starpu_example_dgemm.c
View file @
d0a6fd5b
...
...
@@ -273,7 +273,7 @@ static void cpu_gemm(void *handles[], void *args)
// if (verbose) printf("gemm_task\n");
// printf("DATA %d | ld A %d B %d C %d | alpha %f beta %f \n", datatype, ld_A, ld_B, ld_C, clargs->alpha, clargs->beta);
double
start
=
starpu_timing_now
();
cblas_dgemm
(
Cblas
Col
Major
,
CblasNoTrans
,
CblasNoTrans
,
// 2
cblas_dgemm
(
Cblas
Row
Major
,
CblasNoTrans
,
CblasNoTrans
,
// 2
n_row_C
,
n_col_C
,
n_col_A
,
clargs
->
alpha
,
block_A
,
ld_A
,
block_B
,
// 9
ld_B
,
clargs
->
beta
,
block_C
,
ld_C
);
// 13
double
stop
=
starpu_timing_now
();
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment