Commit 06b351ad authored by Antoine Jego's avatar Antoine Jego
Browse files

added fortran : TODO interpret CL arguments

parent 29473b1a
cmake_minimum_required (VERSION 3.3)
project(starpu_example_dgemm)
project(starpu_example_dgemm C Fortran)
# Check that we do no try to configure/build inside the source directory
# ----------------------------------------------------------------------
......@@ -110,22 +110,28 @@ find_package(LAPACKE REQUIRED)
#add_library(Dense_matrices dsmat.c)
#add_library(Tile_interface optional_tile_interface.c)
add_executable( starpu_example_dgemm starpu_example_dgemm.c dsmat.c optional_tile_interface.c )
add_executable( fstarpu_example_dgemm fstarpu_example_dgemm.f90 fstarpu_codelets.f90 fstarpu_mod.f90 fstarpu_mpi_mod.f90 )
if( ENABLE_MPI )
# target_link_libraries( Dense_matrices PUBLIC MPI::MPI_C )
# target_link_libraries( Tile_interface PUBLIC MPI::MPI_C )
target_link_libraries( starpu_example_dgemm PUBLIC MPI::MPI_C )
target_compile_definitions( starpu_example_dgemm PUBLIC ENABLE_MPI )
target_link_libraries( fstarpu_example_dgemm PUBLIC MPI::MPI_C )
target_compile_definitions( fstarpu_example_dgemm PUBLIC ENABLE_MPI )
endif()
if(ENABLE_STARPU)
# target_link_libraries( Dense_matrices PRIVATE MORSE::STARPU )
# target_link_libraries( Tile_interface PRIVATE MORSE::STARPU )
target_link_libraries( starpu_example_dgemm PRIVATE MORSE::STARPU )
target_compile_definitions( starpu_example_dgemm PUBLIC ENABLE_STARPU )
target_link_libraries( fstarpu_example_dgemm PRIVATE MORSE::STARPU )
target_compile_definitions( fstarpu_example_dgemm PUBLIC ENABLE_STARPU )
check_function_exists(starpu_data_peek HAVE_STARPU_DATA_PEEK)
if (ENABLE_MPI)
check_function_exists(starpu_mpi_interface_datatype_node_register HAVE_STARPU_MPI_INTERFACE_DATATYPE_NODE_REGISTER)
endif()
endif()
target_link_libraries( starpu_example_dgemm PUBLIC MORSE::LAPACKE )
target_link_libraries( starpu_example_dgemm PUBLIC MORSE::CBLAS )
target_link_libraries( starpu_example_dgemm PUBLIC MORSE::M )
......@@ -133,6 +139,14 @@ target_include_directories( starpu_example_dgemm PUBLIC
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>/include
$<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>/include
${CMAKE_CURRENT_SOURCE_DIR} )
target_link_libraries( fstarpu_example_dgemm PUBLIC MORSE::LAPACKE )
target_link_libraries( fstarpu_example_dgemm PUBLIC MORSE::CBLAS )
target_link_libraries( fstarpu_example_dgemm PUBLIC MORSE::M )
target_include_directories( fstarpu_example_dgemm PUBLIC
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>/include
$<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>/include
${CMAKE_CURRENT_SOURCE_DIR} )
# ... and install it
# -------------------
......@@ -143,3 +157,6 @@ install( FILES
include/lapacke.h
DESTINATION include )
install( TARGETS fstarpu_example_dgemm RUNTIME DESTINATION bin )
install( FILES
DESTINATION include )
! StarPU --- Runtime system for heterogeneous multicore architectures.
!
! Copyright (C) 2016-2021 Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
!
! StarPU is free software; you can redistribute it and/or modify
! it under the terms of the GNU Lesser General Public License as published by
! the Free Software Foundation; either version 2.1 of the License, or (at
! your option) any later version.
!
! StarPU is distributed in the hope that it will be useful, but
! WITHOUT ANY WARRANTY; without even the implied warranty of
! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
!
! See the GNU Lesser General Public License in COPYING.LGPL for more details.
!
module fstarpu_codelets
contains
recursive subroutine cl_cpu_gemm (buffers, cl_args) bind(C)
use iso_c_binding ! C interfacing module
use fstarpu_mod ! StarPU interfacing module
implicit none
type(c_ptr), value, intent(in) :: buffers, cl_args ! cl_args is unused
real, target :: alpha, beta
real(kind=c_double),pointer :: A(:,:), B(:,:), C(:,:)
integer :: ld_A,nx_A,ny_A
integer :: ld_B,nx_B,ny_B
integer :: ld_C,nx_C,ny_C
integer :: i,j,k
call fstarpu_unpack_arg( cl_args, (/ c_loc(alpha), c_loc(beta) /))
ld_A = fstarpu_matrix_get_ld(buffers, 0)
ld_B = fstarpu_matrix_get_ld(buffers, 1)
ld_C = fstarpu_matrix_get_ld(buffers, 2)
nx_A = fstarpu_matrix_get_nx(buffers, 0)
nx_B = fstarpu_matrix_get_nx(buffers, 1)
nx_C = fstarpu_matrix_get_nx(buffers, 2)
ny_A = fstarpu_matrix_get_ny(buffers, 0)
ny_B = fstarpu_matrix_get_ny(buffers, 1)
ny_C = fstarpu_matrix_get_ny(buffers, 2)
call c_f_pointer(fstarpu_matrix_get_ptr(buffers, 0), A, shape=[ld_A,ny_A])
call c_f_pointer(fstarpu_matrix_get_ptr(buffers, 1), B, shape=[ld_B,ny_B])
call c_f_pointer(fstarpu_matrix_get_ptr(buffers, 2), C, shape=[ld_C,ny_C])
call dgemm('n','n',nx_C,ny_C,nx_B, alpha, A(1,1), ld_A, B(1,1), ld_B, &
beta, C(1,1), ld_C)
return
end subroutine cl_cpu_gemm
recursive subroutine cl_cpu_fill (buffers, cl_args) bind(C)
use iso_c_binding ! C interfacing module
use fstarpu_mod ! StarPU interfacing module
use fstarpu_mpi_mod
implicit none
type(c_ptr), value, intent(in) :: cl_args
type(c_ptr), value, intent(in) :: buffers
real(kind=c_double), pointer :: x(:,:)
integer :: m, n, ld
integer :: j
integer :: iseed(4) = (/1,1,1,1/)
integer :: comm_rank
comm_rank = fstarpu_mpi_world_rank()
m = fstarpu_matrix_get_nx(buffers, 0)
n = fstarpu_matrix_get_ny(buffers, 0)
ld = fstarpu_matrix_get_ld(buffers, 0)
! write(*,*) comm_rank,"] fill", m, n, ld
call c_f_pointer(fstarpu_matrix_get_ptr(buffers, 0), x, shape=(/ld,n/))
! copied from qrm_dsmat_fill_task a few lines up
do j=1,n
call dlarnv(2, iseed(1), m, x(1, j))
end do
! write(*,*) comm_rank,"]end fill task"
return
end subroutine cl_cpu_fill
end module fstarpu_codelets
! StarPU --- Runtime system for heterogeneous multicore architectures.
!
! Copyright (C) 2016-2021 Université de Bordeaux, CNRS (LaBRI UMR 5800), Inria
!
! StarPU is free software; you can redistribute it and/or modify
! it under the terms of the GNU Lesser General Public License as published by
! the Free Software Foundation; either version 2.1 of the License, or (at
! your option) any later version.
!
! StarPU is distributed in the hope that it will be useful, but
! WITHOUT ANY WARRANTY; without even the implied warranty of
! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
!
! See the GNU Lesser General Public License in COPYING.LGPL for more details.
!
program fstarpu_example_dgemm
use iso_c_binding ! C interfacing module
use fstarpu_mod ! StarPU interfacing module
use fstarpu_mpi_mod ! StarPU-MPI interfacing module
use fstarpu_codelets
type block_type
real(kind=c_double), allocatable :: c(:,:)
type(c_ptr) :: h = c_null_ptr
integer :: owner
end type block_type
type dsmat_type
integer :: m, n, b
type(block_type), allocatable :: blocks(:,:)
end type dsmat_type
logical, parameter :: verbose = .false.
logical :: trace = .false.
logical :: lflush = .false.
logical :: prune = .false.
logical :: prune_handles = .false.
integer(c_int) :: comm_size, comm_rank
integer(c_int), target :: comm_world
integer :: bs
integer(c_int) :: m, mb
integer(c_int) :: n, nb
integer(c_int) :: k, kb
character(len=20) :: str
type(dsmat_type),target :: A, B, C
real(kind=c_double), target :: alpha, beta, zbeta
type(c_ptr) :: cl_mm, cl_fill
integer(c_int) :: ncpu
integer(c_int) :: ret
integer :: i, j, l, p , q, trial, t
integer :: te, ts, tr
real :: tf, gflops
ret = fstarpu_init(C_NULL_PTR)
if (ret == -19) then
stop 77
else if (ret /= 0) then
stop 1
end if
ret = fstarpu_mpi_init(1)
if (ret /= 0) then
write(*,'("fstarpu_mpi_init status:",i4)') ret
stop 1
end if
! stop there if no CPU worker available
ncpu = fstarpu_cpu_worker_get_count()
if (ncpu == 0) then
call fstarpu_shutdown()
stop 77
end if
comm_world = fstarpu_mpi_world_comm()
comm_size = fstarpu_mpi_world_size()
comm_rank = fstarpu_mpi_world_rank()
if (comm_size < 2) then
call fstarpu_shutdown()
ret = fstarpu_mpi_shutdown()
stop 77
end if
call get_command_argument(1, value=str, length=i)
read(str(1:i),*) m
call get_command_argument(2, value=str, length=i)
read(str(1:i),*) n
call get_command_argument(3, value=str, length=i)
read(str(1:i),*) k
call get_command_argument(4, value=str, length=i)
read(str(1:i),*) bs
call get_command_argument(5, value=str, length=i)
read(str(1:i),*) p
call get_command_argument(6, value=str, length=i)
read(str(1:i),*) q
call get_command_argument(7, value=str, length=i)
read(str(1:i),*) t
if (command_argument_count() == 8) then
trace = .true.
end if
if (mod(m,bs).ne.0) stop 75
if (mod(n,bs).ne.0) stop 75
if (mod(k,bs).ne.0) stop 75
mb = m/bs
nb = n/bs
kb = k/bs
if (comm_rank.eq.0) then
write(*,'("========================================")')
write(*,'("mxnxk = ",i5,"x",i5,"x",i5)') m, n, k
write(*,'("mbxnbxkb = ",i5,"x",i5,"x",i5)') mb, nb, kb
write(*,'("B = ",i5)') bs
write(*,'("PxQ = ",i3,"x",i3)') p,q
if (trace) write(*,*) "(T)racing enabled"
if (lflush) write(*,*) "(F)lushing enabled"
if (prune) write(*,*) "(P)runing enabled"
if (prune_handles) write(*,*) "(H)andles pruning enabled"
write(*,'("========================================")')
end if
ret = fstarpu_mpi_barrier(comm_world)
! intialize codelets
call initialize_codelets()
alpha = 0.42
beta = 3.14
do trial=1,t
! allocate matrices
call initialize_matrix(a,mb,kb,"A",prune_handles.and..true.,.false.)
call initialize_matrix(b,kb,nb,"B",.false.,prune_handles.and..true.)
call initialize_matrix(c,mb,nb,"C",prune_handles.and..true.,prune_handles.and..true.)
ret = fstarpu_mpi_barrier(comm_world)
call fill_matrix(A, mb,kb,"A")
call fill_matrix(B, kb,nb,"B")
call fill_matrix(C, mb,nb,"C")
ret = fstarpu_mpi_wait_for_all(comm_world)
ret = fstarpu_mpi_barrier(comm_world)
call system_clock(ts)
! submit matrix multiplication
do i=1,mb
do j=1,nb
do l=1,kb
if (.not.prune.or.(A%blocks(i,l)%owner == comm_rank.or.&
B%blocks(l,j)%owner == comm_rank.or.&
C%blocks(i,j)%owner == comm_rank)) then
! if (comm_rank.eq.0) write(*,*) "GEMM", b_col,b_row,b_aisle
if (l.eq.1) then; zbeta = beta; else; zbeta = 1.0d0; end if
call fstarpu_mpi_task_insert((/ c_loc(comm_world), cl_mm, &
FSTARPU_VALUE, c_loc(alpha), FSTARPU_SZ_REAL8, &
FSTARPU_VALUE, c_loc(zbeta), FSTARPU_SZ_REAL8, &
FSTARPU_R, A%blocks(i,l)%h, &
FSTARPU_R, B%blocks(l,j)%h, &
FSTARPU_RW, C%blocks(i,j)%h, &
c_null_ptr /))
end if
end do
end do
if (lflush) then
do l=1,kb
if (c_associated(A%blocks(i,l)%h)) &
call fstarpu_mpi_cache_flush(comm_world,A%blocks(i,l)%h)
end do
end if
end do
ret = fstarpu_mpi_wait_for_all(comm_world)
ret = fstarpu_mpi_barrier(comm_world)
call system_clock(te,tr)
tf = max(real(te-ts)/real(tr),1e-20)
gflops = 2.0*m*n*k/(tf*10**9)
if (comm_rank.eq.0) write(*,'("RANK ",i3," -> took ",e15.8," s | ", e15.8,"Gflop/s")') &
comm_rank, tf, gflops
! unregister matrices
call unregister_matrix(A,mb,kb)
call unregister_matrix(B,kb,nb)
call unregister_matrix(C,mb,nb)
end do
call fstarpu_codelet_free(cl_mm)
call fstarpu_codelet_free(cl_fill)
call fstarpu_shutdown()
ret = fstarpu_mpi_shutdown()
if (ret /= 0) then
write(*,'("fstarpu_mpi_shutdown status:",i4)') ret
stop 1
end if
contains
subroutine initialize_codelets()
implicit none
cl_mm = fstarpu_codelet_allocate()
call fstarpu_codelet_set_name(cl_mm, c_char_"nf_gemm_cl"//c_null_char)
call fstarpu_codelet_add_cpu_func(cl_mm, C_FUNLOC(cl_cpu_gemm))
call fstarpu_codelet_add_buffer(cl_mm, FSTARPU_R)
call fstarpu_codelet_add_buffer(cl_mm, FSTARPU_R)
call fstarpu_codelet_add_buffer(cl_mm, FSTARPU_RW)
cl_fill = fstarpu_codelet_allocate()
call fstarpu_codelet_set_name(cl_fill, c_char_"nf_fill_cl"//c_null_char)
call fstarpu_codelet_add_cpu_func(cl_fill, C_FUNLOC(cl_cpu_fill))
call fstarpu_codelet_add_buffer(cl_fill, FSTARPU_W)
end subroutine initialize_codelets
subroutine initialize_matrix(X,mb,nb,cname,prune_row,prune_col)
implicit none
type(dsmat_type), target :: x
integer :: mb, nb
character :: cname
integer :: i, j
type(block_type), pointer :: xij
integer(c_int64_t), save :: tag = 1
logical, optional :: prune_row, prune_col
logical :: lrow, lcol
integer :: comm_row, comm_col
if (present(prune_row)) then; lrow = prune_row; else; lrow = .false.; endif;
if (present(prune_col)) then; lcol = prune_col; else; lcol = .false.; endif;
x%m = mb*bs
x%n = nb*bs
x%b = bs
allocate(x%blocks(mb,nb))
comm_col = mod(comm_rank, q)
comm_row = (comm_rank - comm_col)/q
do i=1,mb
do j=1,nb
xij => x%blocks(i,j)
xij%owner = mod(i-1,p)*q + mod(j-1,q)
if (comm_rank.eq.xij%owner) then
! write(*,*) comm_rank,"] I own ",cname,"_",i,j,"so I register it with tag",tag
allocate(xij%c(bs,bs))
call fstarpu_matrix_data_register( xij%h, 0, c_loc( xij%c(1,1) ), &
bs, bs, bs, c_sizeof(xij%c(1,1)) )
call fstarpu_mpi_data_register(xij%h, tag, xij%owner)
else if ((.not.prune_row.or.comm_row.eq.mod(i,p)).or.&
(.not.prune_col.or.comm_col.eq.mod(j,q)) ) then
! write(*,*) comm_rank,"] ",xij%owner," owns ",cname,"_",i,j,"so it registers it with tag",tag
call fstarpu_matrix_data_register( xij%h, -1, c_null_ptr, &
bs, bs, bs, c_sizeof(alpha) )
call fstarpu_mpi_data_register(xij%h, tag, xij%owner)
end if
tag = tag + 1
end do
end do
end subroutine initialize_matrix
subroutine fill_matrix(x,mb,nb,cname)
implicit none
type(dsmat_type), target :: x
integer :: mb, nb
character :: cname
integer :: i, j
type(block_type), pointer :: xij
do i=1,mb
do j=1,nb
xij => x%blocks(i,j)
if (comm_rank.eq.xij%owner) then
! write(*,*) comm_rank,"] I own ",cname,"_",i,j,"so I fill it"
call fstarpu_mpi_task_insert((/ c_loc(comm_world), cl_fill, &
FSTARPU_W, xij%h, &
c_null_ptr /))
else
!write(*,*) comm_rank,"] ",xij%owner,"owns ",cname,"_",i,j,"so it fills it"
end if
end do
end do
end subroutine fill_matrix
subroutine unregister_matrix(x,mb,nb)
implicit none
integer :: mb, nb
type(block_type), pointer :: xij
type(dsmat_type), target :: x
integer :: i, j
do i=1,mb
do j=1,nb
xij => x%blocks(i,j)
if (comm_rank.eq.xij%owner) then
call fstarpu_data_unregister(xij%h)
deallocate(xij%c)
end if
end do
end do
deallocate(x%blocks)
end subroutine unregister_matrix
end program
This diff is collapsed.
This diff is collapsed.
......@@ -566,7 +566,7 @@ int main(int argc, char *argv[])
if (verbose) print_matrix(C,"Cinit");
if (verbose) print_matrix(Cwork,"Cwork");
}
// starpu_data_display_memory_stats();
starpu_data_display_memory_stats();
barrier_ret = starpu_mpi_barrier(MPI_COMM_WORLD);
start = starpu_timing_now();
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment