Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
solverstack
mini-examples
starpu_example_dgemm
Commits
d5eba9f6
Commit
d5eba9f6
authored
Dec 16, 2021
by
Antoine Jego
Browse files
cl_args for fortran
parent
aa0df4c7
Changes
2
Hide whitespace changes
Inline
Side-by-side
fstarpu_codelets.f90
View file @
d5eba9f6
...
...
@@ -14,6 +14,14 @@
! See the GNU Lesser General Public License in COPYING.LGPL for more details.
!
module
fstarpu_codelets
use
iso_c_binding
! C interfacing module
implicit
none
type
cl_gemm_args
real
(
kind
=
c_double
)
::
alpha
real
(
kind
=
c_double
)
::
beta
end
type
contains
recursive
subroutine
cl_cpu_gemm
(
buffers
,
cl_args
)
bind
(
C
)
...
...
@@ -21,7 +29,7 @@ recursive subroutine cl_cpu_gemm (buffers, cl_args) bind(C)
use
fstarpu_mod
! StarPU interfacing module
implicit
none
type
(
c_ptr
),
value
,
intent
(
in
)
::
buffers
,
cl_args
! cl_args is unused
type
(
c_ptr
),
value
,
intent
(
in
)
::
buffers
,
cl_args
real
(
kind
=
c_double
),
target
::
alpha
,
beta
real
(
kind
=
c_double
),
pointer
::
A
(:,:),
B
(:,:),
C
(:,:)
integer
::
ld_A
,
nx_A
,
ny_A
...
...
@@ -59,6 +67,53 @@ recursive subroutine cl_cpu_gemm (buffers, cl_args) bind(C)
end
subroutine
cl_cpu_gemm
recursive
subroutine
cl_cpu_gemm_args
(
buffers
,
cl_args
)
bind
(
C
)
use
iso_c_binding
! C interfacing module
use
fstarpu_mod
! StarPU interfacing module
implicit
none
type
(
c_ptr
),
value
,
intent
(
in
)
::
buffers
type
(
c_ptr
),
value
::
cl_args
type
(
cl_gemm_args
),
pointer
::
fcl_args
real
(
kind
=
c_double
),
target
::
alpha
,
beta
real
(
kind
=
c_double
),
pointer
::
A
(:,:),
B
(:,:),
C
(:,:)
integer
::
ld_A
,
nx_A
,
ny_A
integer
::
ld_B
,
nx_B
,
ny_B
integer
::
ld_C
,
nx_C
,
ny_C
integer
::
i
,
j
,
k
integer
::
te
,
ts
,
tr
real
::
tf
,
gflops
call
c_f_pointer
(
cl_args
,
fcl_args
)
alpha
=
fcl_args
%
alpha
beta
=
fcl_args
%
beta
ld_A
=
fstarpu_matrix_get_ld
(
buffers
,
0
)
ld_B
=
fstarpu_matrix_get_ld
(
buffers
,
1
)
ld_C
=
fstarpu_matrix_get_ld
(
buffers
,
2
)
nx_A
=
fstarpu_matrix_get_nx
(
buffers
,
0
)
nx_B
=
fstarpu_matrix_get_nx
(
buffers
,
1
)
nx_C
=
fstarpu_matrix_get_nx
(
buffers
,
2
)
ny_A
=
fstarpu_matrix_get_ny
(
buffers
,
0
)
ny_B
=
fstarpu_matrix_get_ny
(
buffers
,
1
)
ny_C
=
fstarpu_matrix_get_ny
(
buffers
,
2
)
call
c_f_pointer
(
fstarpu_matrix_get_ptr
(
buffers
,
0
),
A
,
shape
=
[
ld_A
,
ny_A
])
call
c_f_pointer
(
fstarpu_matrix_get_ptr
(
buffers
,
1
),
B
,
shape
=
[
ld_B
,
ny_B
])
call
c_f_pointer
(
fstarpu_matrix_get_ptr
(
buffers
,
2
),
C
,
shape
=
[
ld_C
,
ny_C
])
call
system_clock
(
ts
)
call
dgemm
(
'n'
,
'n'
,
nx_C
,
ny_C
,
nx_B
,
alpha
,
A
(
1
,
1
),
ld_A
,
B
(
1
,
1
),
ld_B
,
&
beta
,
C
(
1
,
1
),
ld_C
)
call
system_clock
(
te
,
tr
)
!tf = max(real(te-ts)/real(tr),1e-20)
!gflops = 2.0*nx_C*ny_C*nx_B/(tf*10**9)
!write(*,*) "gemm_task", gflops, "Gflop/s", alpha, beta
return
end
subroutine
cl_cpu_gemm_args
recursive
subroutine
cl_cpu_fill
(
buffers
,
cl_args
)
bind
(
C
)
use
iso_c_binding
! C interfacing module
use
fstarpu_mod
! StarPU interfacing module
...
...
fstarpu_example_dgemm.f90
View file @
d5eba9f6
...
...
@@ -41,6 +41,7 @@ program fstarpu_example_dgemm
logical
::
delay
=
.false.
logical
::
provide_context
=
.false.
logical
::
warmup
=
.true.
logical
::
codelet_args
=
.false.
integer
(
c_int
)
::
comm_size
,
comm_rank
integer
(
c_int
),
target
::
comm_world
...
...
@@ -53,7 +54,9 @@ program fstarpu_example_dgemm
type
(
dsmat_type
),
target
::
A
,
B
,
C
logical
::
A_local
,
B_local
,
C_local
real
(
kind
=
c_double
),
target
::
alpha
,
beta
,
zbeta
type
(
c_ptr
)
::
cl_mm
,
cl_fill
type
(
c_ptr
)
::
cl_mm
,
cl_mm_args
,
cl_fill
type
(
cl_gemm_args
),
pointer
::
cl_args
integer
(
c_int
),
pointer
::
args_sz
integer
(
c_int
)
::
ncpu
integer
(
c_int
)
::
ret
integer
::
i
,
j
,
l
,
p
,
q
,
trial
,
t
...
...
@@ -128,6 +131,8 @@ program fstarpu_example_dgemm
provide_context
=
.true.
case
(
'-now'
)
warmup
=
.false.
case
(
'-a'
)
codelet_args
=
.true.
! keep -e as an empty argument for debug purpose
end
select
end
do
...
...
@@ -152,7 +157,8 @@ program fstarpu_example_dgemm
if
(
prune_handles
)
write
(
*
,
*
)
"(H)andles pruning enabled"
if
(
delay
)
write
(
*
,
*
)
"(D)elayed handle registration enabled"
if
(
provide_context
)
write
(
*
,
*
)
"(C)ontext provided at submission"
if
(
.not.
warmup
)
write
(
*
,
*
)
"(W)armup disabled"
if
(
.not.
warmup
)
write
(
*
,
*
)
"(W)armup disabled"
if
(
codelet_args
)
write
(
*
,
*
)
"(A)rguments NOT by values"
write
(
*
,
'("========================================")'
)
end
if
ret
=
fstarpu_mpi_barrier
(
comm_world
)
...
...
@@ -218,6 +224,21 @@ program fstarpu_example_dgemm
FSTARPU_RW
,
C
%
blocks
(
i
,
j
)
%
h
,
&
FSTARPU_SCHED_CTX
,
c_loc
(
arg_ctx
),
&
c_null_ptr
/))
else
if
(
codelet_args
)
then
if
(
C_local
)
then
allocate
(
cl_args
)
cl_args
%
alpha
=
alpha
cl_args
%
beta
=
zbeta
end
if
args_sz
=
storage_size
(
cl_args
)/
8
call
fstarpu_mpi_task_insert
((/
c_loc
(
comm_world
),
cl_mm_args
,
&
FSTARPU_CL_ARGS
,
c_loc
(
cl_args
),
c_loc
(
args_sz
),
&
FSTARPU_R
,
A
%
blocks
(
i
,
l
)
%
h
,
&
FSTARPU_R
,
B
%
blocks
(
l
,
j
)
%
h
,
&
FSTARPU_RW
,
C
%
blocks
(
i
,
j
)
%
h
,
&
c_null_ptr
/))
! We do not deallocate ... which is weird ... but the task does
! it for us upon destruction
else
call
fstarpu_mpi_task_insert
((/
c_loc
(
comm_world
),
cl_mm
,
&
FSTARPU_VALUE
,
c_loc
(
alpha
),
FSTARPU_SZ_REAL8
,
&
...
...
@@ -263,6 +284,7 @@ program fstarpu_example_dgemm
endif
call
fstarpu_codelet_free
(
cl_mm
)
call
fstarpu_codelet_free
(
cl_mm_args
)
call
fstarpu_codelet_free
(
cl_fill
)
call
fstarpu_shutdown
()
...
...
@@ -282,6 +304,12 @@ contains
call
fstarpu_codelet_add_buffer
(
cl_mm
,
FSTARPU_R
)
call
fstarpu_codelet_add_buffer
(
cl_mm
,
FSTARPU_R
)
call
fstarpu_codelet_add_buffer
(
cl_mm
,
FSTARPU_RW
)
cl_mm_args
=
fstarpu_codelet_allocate
()
call
fstarpu_codelet_set_name
(
cl_mm_args
,
c_char_"nf_gemm_cl_args"
//
c_null_char
)
call
fstarpu_codelet_add_cpu_func
(
cl_mm_args
,
C_FUNLOC
(
cl_cpu_gemm_args
))
call
fstarpu_codelet_add_buffer
(
cl_mm_args
,
FSTARPU_R
)
call
fstarpu_codelet_add_buffer
(
cl_mm_args
,
FSTARPU_R
)
call
fstarpu_codelet_add_buffer
(
cl_mm_args
,
FSTARPU_RW
)
cl_fill
=
fstarpu_codelet_allocate
()
call
fstarpu_codelet_set_name
(
cl_fill
,
c_char_"nf_fill_cl"
//
c_null_char
)
call
fstarpu_codelet_add_cpu_func
(
cl_fill
,
C_FUNLOC
(
cl_cpu_fill
))
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment