Mentions légales du service

Skip to content
Snippets Groups Projects
Commit d0a6fd5b authored by Antoine Jego's avatar Antoine Jego :alembic:
Browse files

option to hand out a built context

parent bfc633de
No related branches found
No related tags found
No related merge requests found
......@@ -7,3 +7,8 @@ This is a mini-example of distributed gemm for starpu mpi, both in Fortran and C
make -j
make install
#+end_src
* TODO
- add warmup run
......@@ -39,6 +39,8 @@ program fstarpu_example_dgemm
logical :: super_prune = .false.
logical :: prune_handles = .false.
logical :: delay = .false.
logical :: provide_context = .false.
logical :: warmup = .true.
integer(c_int) :: comm_size, comm_rank
integer(c_int), target :: comm_world
......@@ -58,6 +60,11 @@ program fstarpu_example_dgemm
integer :: te, ts, tr
real :: tf, gflops
integer(c_int), dimension(:), allocatable :: procs
integer(c_int) :: ctx
integer(c_int),target :: arg_ctx
character(kind=c_char,len=4), target :: ctx_policy = C_CHAR_"lws"//C_NULL_CHAR
write(*,*) "initializing starpu ..."
ret = fstarpu_init(C_NULL_PTR)
if (ret == -19) then
......@@ -117,6 +124,11 @@ program fstarpu_example_dgemm
prune_handles = .true.
case('-d')
delay = .true.
case('-c')
provide_context = .true.
case('-now')
warmup = .false.
! keep -e as an empty argument for debug purpose
end select
end do
......@@ -133,12 +145,14 @@ program fstarpu_example_dgemm
write(*,'("mbxnbxkb = ",i5,"x",i5,"x",i5)') mb, nb, kb
write(*,'("B = ",i5)') bs
write(*,'("PxQ = ",i3,"x",i3)') p,q
if (trace) write(*,*) "(T)racing enabled"
if (lflush) write(*,*) "(F)lushing enabled"
if (super_prune) write(*,*) "(S)uper-pruning enabled"
if (prune) write(*,*) "(P)runing enabled"
if (prune_handles) write(*,*) "(H)andles pruning enabled"
if (delay) write(*,*) "(D)elayed handle registration enabled"
if (trace) write(*,*) "(T)racing enabled"
if (lflush) write(*,*) "(F)lushing enabled"
if (super_prune) write(*,*) "(S)uper-pruning enabled"
if (prune) write(*,*) "(P)runing enabled"
if (prune_handles) write(*,*) "(H)andles pruning enabled"
if (delay) write(*,*) "(D)elayed handle registration enabled"
if (provide_context) write(*,*) "(C)ontext provided at submission"
if (.not.warmup) write(*,*) "(W)armup disabled"
write(*,'("========================================")')
end if
ret = fstarpu_mpi_barrier(comm_world)
......@@ -148,6 +162,14 @@ program fstarpu_example_dgemm
alpha = 0.42
beta = 3.14
if (provide_context) then
allocate(procs(ncpu))
err = fstarpu_worker_get_ids_by_type(FSTARPU_CPU_WORKER, procs, ncpu)
ctx = fstarpu_sched_ctx_create(procs, ncpu, C_CHAR_"stdalone"//C_NULL_CHAR,&
(/ FSTARPU_SCHED_CTX_POLICY_NAME, c_loc(ctx_policy), c_null_ptr /) )
end if
if (warmup) t = t + 1
do trial=1,t
! allocate matrices
call initialize_matrix(A,mb,kb,"A",.true. ,.false.)
......@@ -186,13 +208,25 @@ program fstarpu_example_dgemm
call block_register(B,l,j)
call block_register(C,i,j)
end if
call fstarpu_mpi_task_insert((/ c_loc(comm_world), cl_mm, &
if (provide_context) then
arg_ctx = ctx
call fstarpu_mpi_task_insert((/ c_loc(comm_world), cl_mm, &
FSTARPU_VALUE, c_loc(alpha), FSTARPU_SZ_REAL8, &
FSTARPU_VALUE, c_loc(zbeta), FSTARPU_SZ_REAL8, &
FSTARPU_R, A%blocks(i,l)%h, &
FSTARPU_R, B%blocks(l,j)%h, &
FSTARPU_RW, C%blocks(i,j)%h, &
FSTARPU_SCHED_CTX, c_loc(arg_ctx), &
c_null_ptr /))
else
call fstarpu_mpi_task_insert((/ c_loc(comm_world), cl_mm, &
FSTARPU_VALUE, c_loc(alpha), FSTARPU_SZ_REAL8, &
FSTARPU_VALUE, c_loc(zbeta), FSTARPU_SZ_REAL8, &
FSTARPU_R, A%blocks(i,l)%h, &
FSTARPU_R, B%blocks(l,j)%h, &
FSTARPU_RW, C%blocks(i,j)%h, &
c_null_ptr /))
end if
else
!could write something
end if
......@@ -211,7 +245,8 @@ program fstarpu_example_dgemm
call system_clock(te,tr)
tf = max(real(te-ts)/real(tr),1e-20)
gflops = 2.0*m*n*k/(tf*10**9)
if (comm_rank.eq.0) write(*,'("RANK ",i3," -> took ",e15.8," s | ", e15.8," Gflop/s")') &
if (comm_rank.eq.0.and.(.not.warmup.or.trial.gt.1)) &
write(*,'("RANK ",i3," -> took ",e15.8," s | ", e15.8," Gflop/s")') &
comm_rank, tf, gflops
! unregister matrices
......@@ -220,6 +255,10 @@ program fstarpu_example_dgemm
call unregister_matrix(C,mb,nb)
end do
if (provide_context) then
call fstarpu_sched_ctx_delete(ctx)
deallocate(procs)
endif
call fstarpu_codelet_free(cl_mm)
call fstarpu_codelet_free(cl_fill)
......
......@@ -273,7 +273,7 @@ static void cpu_gemm(void *handles[], void *args)
// if (verbose) printf("gemm_task\n");
// printf("DATA %d | ld A %d B %d C %d | alpha %f beta %f \n", datatype, ld_A, ld_B, ld_C, clargs->alpha, clargs->beta);
double start = starpu_timing_now();
cblas_dgemm( CblasColMajor, CblasNoTrans, CblasNoTrans, // 2
cblas_dgemm( CblasRowMajor, CblasNoTrans, CblasNoTrans, // 2
n_row_C, n_col_C, n_col_A, clargs->alpha, block_A, ld_A, block_B, // 9
ld_B, clargs->beta, block_C, ld_C ); // 13
double stop = starpu_timing_now();
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment