Commit d5eba9f6 authored by Antoine Jego's avatar Antoine Jego
Browse files

cl_args for fortran

parent aa0df4c7
......@@ -14,6 +14,14 @@
! See the GNU Lesser General Public License in COPYING.LGPL for more details.
!
module fstarpu_codelets
use iso_c_binding ! C interfacing module
implicit none
type cl_gemm_args
real(kind=c_double) :: alpha
real(kind=c_double) :: beta
end type
contains
recursive subroutine cl_cpu_gemm (buffers, cl_args) bind(C)
......@@ -21,7 +29,7 @@ recursive subroutine cl_cpu_gemm (buffers, cl_args) bind(C)
use fstarpu_mod ! StarPU interfacing module
implicit none
type(c_ptr), value, intent(in) :: buffers, cl_args ! cl_args is unused
type(c_ptr), value, intent(in) :: buffers, cl_args
real(kind=c_double), target :: alpha, beta
real(kind=c_double),pointer :: A(:,:), B(:,:), C(:,:)
integer :: ld_A,nx_A,ny_A
......@@ -59,6 +67,53 @@ recursive subroutine cl_cpu_gemm (buffers, cl_args) bind(C)
end subroutine cl_cpu_gemm
recursive subroutine cl_cpu_gemm_args (buffers, cl_args) bind(C)
use iso_c_binding ! C interfacing module
use fstarpu_mod ! StarPU interfacing module
implicit none
type(c_ptr), value, intent(in) :: buffers
type(c_ptr), value :: cl_args
type(cl_gemm_args), pointer :: fcl_args
real(kind=c_double), target :: alpha, beta
real(kind=c_double), pointer :: A(:,:), B(:,:), C(:,:)
integer :: ld_A,nx_A,ny_A
integer :: ld_B,nx_B,ny_B
integer :: ld_C,nx_C,ny_C
integer :: i,j,k
integer :: te, ts, tr
real :: tf, gflops
call c_f_pointer(cl_args, fcl_args)
alpha = fcl_args%alpha
beta = fcl_args%beta
ld_A = fstarpu_matrix_get_ld(buffers, 0)
ld_B = fstarpu_matrix_get_ld(buffers, 1)
ld_C = fstarpu_matrix_get_ld(buffers, 2)
nx_A = fstarpu_matrix_get_nx(buffers, 0)
nx_B = fstarpu_matrix_get_nx(buffers, 1)
nx_C = fstarpu_matrix_get_nx(buffers, 2)
ny_A = fstarpu_matrix_get_ny(buffers, 0)
ny_B = fstarpu_matrix_get_ny(buffers, 1)
ny_C = fstarpu_matrix_get_ny(buffers, 2)
call c_f_pointer(fstarpu_matrix_get_ptr(buffers, 0), A, shape=[ld_A,ny_A])
call c_f_pointer(fstarpu_matrix_get_ptr(buffers, 1), B, shape=[ld_B,ny_B])
call c_f_pointer(fstarpu_matrix_get_ptr(buffers, 2), C, shape=[ld_C,ny_C])
call system_clock(ts)
call dgemm('n','n',nx_C,ny_C,nx_B, alpha, A(1,1), ld_A, B(1,1), ld_B, &
beta, C(1,1), ld_C)
call system_clock(te,tr)
!tf = max(real(te-ts)/real(tr),1e-20)
!gflops = 2.0*nx_C*ny_C*nx_B/(tf*10**9)
!write(*,*) "gemm_task", gflops, "Gflop/s", alpha, beta
return
end subroutine cl_cpu_gemm_args
recursive subroutine cl_cpu_fill (buffers, cl_args) bind(C)
use iso_c_binding ! C interfacing module
use fstarpu_mod ! StarPU interfacing module
......
......@@ -41,6 +41,7 @@ program fstarpu_example_dgemm
logical :: delay = .false.
logical :: provide_context = .false.
logical :: warmup = .true.
logical :: codelet_args = .false.
integer(c_int) :: comm_size, comm_rank
integer(c_int), target :: comm_world
......@@ -53,7 +54,9 @@ program fstarpu_example_dgemm
type(dsmat_type),target :: A, B, C
logical :: A_local, B_local, C_local
real(kind=c_double), target :: alpha, beta, zbeta
type(c_ptr) :: cl_mm, cl_fill
type(c_ptr) :: cl_mm, cl_mm_args, cl_fill
type(cl_gemm_args), pointer :: cl_args
integer(c_int), pointer :: args_sz
integer(c_int) :: ncpu
integer(c_int) :: ret
integer :: i, j, l, p , q, trial, t
......@@ -128,6 +131,8 @@ program fstarpu_example_dgemm
provide_context = .true.
case('-now')
warmup = .false.
case('-a')
codelet_args = .true.
! keep -e as an empty argument for debug purpose
end select
end do
......@@ -152,7 +157,8 @@ program fstarpu_example_dgemm
if (prune_handles) write(*,*) "(H)andles pruning enabled"
if (delay) write(*,*) "(D)elayed handle registration enabled"
if (provide_context) write(*,*) "(C)ontext provided at submission"
if (.not.warmup) write(*,*) "(W)armup disabled"
if (.not.warmup) write(*,*) "(W)armup disabled"
if (codelet_args) write(*,*) "(A)rguments NOT by values"
write(*,'("========================================")')
end if
ret = fstarpu_mpi_barrier(comm_world)
......@@ -218,6 +224,21 @@ program fstarpu_example_dgemm
FSTARPU_RW, C%blocks(i,j)%h, &
FSTARPU_SCHED_CTX, c_loc(arg_ctx), &
c_null_ptr /))
else if (codelet_args) then
if (C_local) then
allocate(cl_args)
cl_args%alpha = alpha
cl_args%beta = zbeta
end if
args_sz = storage_size(cl_args)/8
call fstarpu_mpi_task_insert((/ c_loc(comm_world), cl_mm_args, &
FSTARPU_CL_ARGS, c_loc(cl_args), c_loc(args_sz), &
FSTARPU_R, A%blocks(i,l)%h, &
FSTARPU_R, B%blocks(l,j)%h, &
FSTARPU_RW, C%blocks(i,j)%h, &
c_null_ptr /))
! We do not deallocate ... which is weird ... but the task does
! it for us upon destruction
else
call fstarpu_mpi_task_insert((/ c_loc(comm_world), cl_mm, &
FSTARPU_VALUE, c_loc(alpha), FSTARPU_SZ_REAL8, &
......@@ -263,6 +284,7 @@ program fstarpu_example_dgemm
endif
call fstarpu_codelet_free(cl_mm)
call fstarpu_codelet_free(cl_mm_args)
call fstarpu_codelet_free(cl_fill)
call fstarpu_shutdown()
......@@ -282,6 +304,12 @@ contains
call fstarpu_codelet_add_buffer(cl_mm, FSTARPU_R)
call fstarpu_codelet_add_buffer(cl_mm, FSTARPU_R)
call fstarpu_codelet_add_buffer(cl_mm, FSTARPU_RW)
cl_mm_args = fstarpu_codelet_allocate()
call fstarpu_codelet_set_name(cl_mm_args, c_char_"nf_gemm_cl_args"//c_null_char)
call fstarpu_codelet_add_cpu_func(cl_mm_args, C_FUNLOC(cl_cpu_gemm_args))
call fstarpu_codelet_add_buffer(cl_mm_args, FSTARPU_R)
call fstarpu_codelet_add_buffer(cl_mm_args, FSTARPU_R)
call fstarpu_codelet_add_buffer(cl_mm_args, FSTARPU_RW)
cl_fill = fstarpu_codelet_allocate()
call fstarpu_codelet_set_name(cl_fill, c_char_"nf_fill_cl"//c_null_char)
call fstarpu_codelet_add_cpu_func(cl_fill, C_FUNLOC(cl_cpu_fill))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment