Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
solverstack
mini-examples
starpu_example_dgemm
Commits
0ec80708
Commit
0ec80708
authored
Dec 03, 2021
by
Antoine Jego
Browse files
simpler handle registration accounts properly for delay or pruning handles
parent
e0a5e9b2
Changes
3
Hide whitespace changes
Inline
Side-by-side
dsmat.c
View file @
0ec80708
...
...
@@ -23,7 +23,6 @@ Matrix* alloc_matrix(int mb, int nb, int b, int p, int q, starpu_mpi_tag_t* tag)
X
->
blocks
[
i
*
nb
+
j
].
ld
=
b
;
X
->
blocks
[
i
*
nb
+
j
].
tag
=
*
tag
;
X
->
blocks
[
i
*
nb
+
j
].
registered
=
0
;
X
->
blocks
[
i
*
nb
+
j
].
hdl
=
malloc
(
sizeof
(
starpu_data_handle_t
));
if
(
X
->
blocks
[
i
*
nb
+
j
].
owner
==
comm_rank
)
X
->
blocks
[
i
*
nb
+
j
].
c
=
malloc
(
b
*
b
*
sizeof
(
double
));
else
...
...
@@ -73,15 +72,16 @@ void register_matrix(Matrix* X, int mb, int nb, int datatype, int prune_handles,
// printf("[%d] X_%d,%d | tag:%d\n",comm_rank,b_row,b_col,Xij->tag);
if
(
Xij
->
owner
==
comm_rank
)
{
Xij
->
hdl
=
malloc
(
sizeof
(
starpu_data_handle_t
));
if
(
datatype
)
{
starpu_tile_register
(
Xij
->
hdl
,
STARPU_MAIN_RAM
,
Xij
);
starpu_tile_register
(
&
Xij
->
hdl
,
STARPU_MAIN_RAM
,
Xij
);
}
else
{
starpu_matrix_data_register
(
Xij
->
hdl
,
STARPU_MAIN_RAM
,
starpu_matrix_data_register
(
&
Xij
->
hdl
,
STARPU_MAIN_RAM
,
(
uintptr_t
)
Xij
->
c
,
Xij
->
m
,
Xij
->
n
,
Xij
->
ld
,
sizeof
(
double
));
}
//printf("[%d] X_%d,%d | mpi_data_register %p %p\n",comm_rank,b_row,b_col,*
Xij->hdl,Xij->
hdl
);
starpu_mpi_data_register
(
*
Xij
->
hdl
,
Xij
->
tag
,
Xij
->
owner
);
starpu_mpi_data_register
(
Xij
->
hdl
,
Xij
->
tag
,
Xij
->
owner
);
//
printf("[%d] X_%d,%d | mpi_data_register %p\n",comm_rank,b_row,b_col,Xij->hdl
);
Xij
->
registered
=
1
;
}
else
if
(
!
delay
&&
(
!
prune_handles
||
(
row
&&
proc_row
==
b_row
%
p
)
||
(
col
&&
proc_col
==
b_col
%
q
)
||
...
...
@@ -107,7 +107,7 @@ void unregister_matrix(Matrix* X, int mb, int nb)
// assuming we flush, we do not need to unregister everywhere
if
(
X
->
blocks
[
b_row
*
nb
+
b_col
].
owner
==
comm_rank
)
{
// printf("[%d] unregistering X_%d,%d\n", comm_rank, b_row, b_col);
starpu_data_unregister
(
*
X
->
blocks
[
b_row
*
nb
+
b_col
].
hdl
);
starpu_data_unregister
(
X
->
blocks
[
b_row
*
nb
+
b_col
].
hdl
);
}
}
}
...
...
@@ -136,18 +136,19 @@ void print_matrix(Matrix* X, char* name) {
void
block_starpu_register
(
Block
*
Xij
,
int
datatype
)
{
if
(
!
Xij
->
registered
)
{
//printf("[%d] X_block | mpi_data_register %p %p\n",comm_rank,*Xij->hdl,Xij->hdl);
Xij
->
hdl
=
malloc
(
sizeof
(
starpu_data_handle_t
));
// printf("[%d] X_block | mpi_data_register %p\n",comm_rank,Xij->hdl);
starpu_mpi_comm_rank
(
MPI_COMM_WORLD
,
&
comm_rank
);
if
(
datatype
)
{
starpu_tile_register
(
Xij
->
hdl
,
-
1
,
Xij
);
starpu_tile_register
(
&
Xij
->
hdl
,
-
1
,
Xij
);
}
else
{
starpu_matrix_data_register
(
Xij
->
hdl
,
-
1
,
starpu_matrix_data_register
(
&
Xij
->
hdl
,
-
1
,
(
uintptr_t
)
NULL
,
Xij
->
m
,
Xij
->
n
,
Xij
->
ld
,
sizeof
(
double
));
}
starpu_mpi_data_register
(
*
Xij
->
hdl
,
Xij
->
tag
,
Xij
->
owner
);
starpu_mpi_data_register
(
Xij
->
hdl
,
Xij
->
tag
,
Xij
->
owner
);
Xij
->
registered
=
1
;
}
else
{
//printf("[%d] X_block | already registered\n");
//
printf("[%d] X_block | already registered\n");
}
}
dsmat.h
View file @
0ec80708
...
...
@@ -6,7 +6,7 @@ typedef struct Blocks
double
*
c
;
int
m
,
n
,
ld
;
int
owner
;
starpu_data_handle_t
*
hdl
;
starpu_data_handle_t
hdl
;
starpu_mpi_tag_t
tag
;
int
registered
;
}
Block
;
...
...
starpu_example_dgemm.c
View file @
0ec80708
...
...
@@ -244,7 +244,6 @@ struct cl_zgemm_args_s {
static
void
cpu_gemm
(
void
*
handles
[],
void
*
args
)
{
if
(
verbose
)
printf
(
"??? gemm_task
\n
"
);
struct
cl_zgemm_args_s
*
clargs
=
(
struct
cl_zgemm_args_s
*
)
args
;
double
*
block_A
;
...
...
@@ -256,7 +255,6 @@ static void cpu_gemm(void *handles[], void *args)
unsigned
ld_A
;
unsigned
ld_B
;
unsigned
ld_C
;
if
(
verbose
)
printf
(
"init gemm_task
\n
"
);
if
(
datatype
)
{
Block
*
A
=
ti_interface_get
(
handles
[
0
]);
Block
*
B
=
ti_interface_get
(
handles
[
1
]);
...
...
@@ -281,7 +279,7 @@ static void cpu_gemm(void *handles[], void *args)
ld_B
=
STARPU_MATRIX_GET_LD
(
handles
[
1
]);
ld_C
=
STARPU_MATRIX_GET_LD
(
handles
[
2
]);
}
if
(
verbose
)
printf
(
"gemm_task
\n
"
);
//
if (verbose) printf("gemm_task\n");
// printf("DATA %d | ld A %d B %d C %d | alpha %f beta %f \n", datatype, ld_A, ld_B, ld_C, clargs->alpha, clargs->beta);
double
start
=
starpu_timing_now
();
cblas_dgemm
(
CblasRowMajor
,
CblasNoTrans
,
CblasNoTrans
,
// 2
...
...
@@ -322,16 +320,19 @@ static void cpu_fill(void *handles[], void *arg)
static
void
cpu_copy
(
void
*
handles
[],
void
*
arg
)
{
(
void
)
arg
;
// FIXME
double
*
block_A
=
(
double
*
)
STARPU_MATRIX_GET_PTR
(
handles
[
0
]);
double
*
block_B
=
(
double
*
)
STARPU_MATRIX_GET_PTR
(
handles
[
1
]);
double
*
block_A
,
*
block_B
;
unsigned
n_col_A
,
n_row_A
;
if
(
datatype
)
{
Block
*
tile
=
ti_interface_get
(
handles
[
0
]);
n_col_A
=
tile
->
n
;
n_row_A
=
tile
->
m
;
Block
*
tile_A
=
ti_interface_get
(
handles
[
0
]);
Block
*
tile_B
=
ti_interface_get
(
handles
[
1
]);
block_A
=
tile_A
->
c
;
block_B
=
tile_B
->
c
;
n_col_A
=
tile_A
->
n
;
n_row_A
=
tile_A
->
m
;
}
else
{
block_A
=
(
double
*
)
STARPU_MATRIX_GET_PTR
(
handles
[
0
]);
block_B
=
(
double
*
)
STARPU_MATRIX_GET_PTR
(
handles
[
1
]);
n_col_A
=
STARPU_MATRIX_GET_NX
(
handles
[
0
]);
n_row_A
=
STARPU_MATRIX_GET_NY
(
handles
[
0
]);
}
...
...
@@ -426,7 +427,7 @@ static void init_matrix(Matrix* X, int mb, int nb)
{
// printf("[%d] fill X_%d,%d %p\n",comm_rank,row,col, X->blocks[row*nb+col].hdl);
starpu_mpi_task_insert
(
MPI_COMM_WORLD
,
&
fill_cl
,
STARPU_W
,
*
X
->
blocks
[
row
*
nb
+
col
].
hdl
,
0
);
STARPU_W
,
X
->
blocks
[
row
*
nb
+
col
].
hdl
,
0
);
// printf("[%d] filled X_%d,%d\n",comm_rank,row,col);
}
}
...
...
@@ -444,8 +445,8 @@ static void copy_matrix(Matrix* A, Matrix* B)
||
B
->
blocks
[
row
*
A
->
nb
+
col
].
owner
==
comm_rank
)
{
starpu_mpi_task_insert
(
MPI_COMM_WORLD
,
&
copy_cl
,
STARPU_W
,
*
A
->
blocks
[
row
*
A
->
nb
+
col
].
hdl
,
STARPU_R
,
*
B
->
blocks
[
row
*
A
->
nb
+
col
].
hdl
,
0
);
STARPU_W
,
A
->
blocks
[
row
*
A
->
nb
+
col
].
hdl
,
STARPU_R
,
B
->
blocks
[
row
*
A
->
nb
+
col
].
hdl
,
0
);
}
}
}
...
...
@@ -629,6 +630,7 @@ int main(int argc, char *argv[])
if
((
!
super_prune
||
(
c_local
||
(
a_local
&&
b_col
<=
Q
)
||
(
b_local
&&
b_row
<=
P
)
))
&&
(
!
prune
||
(
a_local
||
b_local
||
c_local
)))
{
if
(
delay
)
{
// printf("[%d] late registration i,j,l %d,%d,%d\n",comm_rank,b_row,b_col,b_aisle);
if
(
!
prune_handles
||
c_local
)
{
block_starpu_register
(
&
(
A
->
blocks
[
b_row
*
KB
+
b_aisle
]),
datatype
);
block_starpu_register
(
&
(
B
->
blocks
[
b_aisle
*
NB
+
b_col
]),
datatype
);
...
...
@@ -637,6 +639,7 @@ int main(int argc, char *argv[])
}
struct
cl_zgemm_args_s
*
clargs
=
NULL
;
if
(
c_local
)
{
// printf("[%d] executing %d,%d (l:%d)\n",comm_rank,b_row,b_col,b_aisle);
clargs
=
malloc
(
sizeof
(
struct
cl_zgemm_args_s
));
clargs
->
alpha
=
alpha
;
clargs
->
beta
=
b_aisle
==
0
?
beta
:
1
.
0
;
...
...
@@ -644,9 +647,9 @@ int main(int argc, char *argv[])
starpu_mpi_task_insert
(
MPI_COMM_WORLD
,
&
gemm_cl
,
STARPU_CL_ARGS
,
clargs
,
sizeof
(
struct
cl_zgemm_args_s
),
//STARPU_R, *A->blocks[b_row*KB+b_aisle].hdl,
STARPU_R
,
*
A
->
blocks
[
b_row
*
KB
+
b_aisle
].
hdl
,
STARPU_R
,
*
B
->
blocks
[
b_aisle
*
NB
+
b_col
].
hdl
,
STARPU_RW
,
*
C
->
blocks
[
b_row
*
NB
+
b_col
].
hdl
,
0
);
STARPU_R
,
A
->
blocks
[
b_row
*
KB
+
b_aisle
].
hdl
,
STARPU_R
,
B
->
blocks
[
b_aisle
*
NB
+
b_col
].
hdl
,
STARPU_RW
,
C
->
blocks
[
b_row
*
NB
+
b_col
].
hdl
,
0
);
//printf("[%d] inserted C_%d,%d += A_%d,%d B_%d,%d\n",comm_rank, b_row,b_col, b_row,b_aisle, b_aisle,b_col);
}
else
{
//printf("[%d] NOT inserted C_%d,%d += A_%d,%d B_%d,%d\n",comm_rank, b_row,b_col, b_row,b_aisle, b_aisle,b_col);
...
...
@@ -656,12 +659,8 @@ int main(int argc, char *argv[])
if
(
flush
)
{
for
(
b_aisle
=
0
;
b_aisle
<
KB
;
b_aisle
++
)
{
if
(
A
->
blocks
[
b_row
*
KB
+
b_aisle
].
registered
)
{
starpu_mpi_cache_flush
(
MPI_COMM_WORLD
,
*
A
->
blocks
[
b_row
*
KB
+
b_aisle
].
hdl
);
// printf("[%d] flushed A_%d,%d\n",comm_rank,b_row,b_aisle);
}
else
{
// printf("[%d] NOT flushing A_%d,%d\n",comm_rank,b_row,b_aisle);
}
if
(
A
->
blocks
[
b_row
*
KB
+
b_aisle
].
registered
)
starpu_mpi_cache_flush
(
MPI_COMM_WORLD
,
A
->
blocks
[
b_row
*
KB
+
b_aisle
].
hdl
);
}
}
}
...
...
@@ -703,9 +702,9 @@ int main(int argc, char *argv[])
clargs
->
beta
=
b_aisle
==
0
?
beta
:
1
.
0
;
starpu_mpi_task_insert
(
MPI_COMM_WORLD
,
&
gemm_cl
,
STARPU_CL_ARGS
,
clargs
,
sizeof
(
struct
cl_zgemm_args_s
),
STARPU_R
,
*
Acheck
->
blocks
[
b_row
*
KB
+
b_aisle
].
hdl
,
STARPU_R
,
*
Bcheck
->
blocks
[
b_aisle
*
NB
+
b_col
].
hdl
,
STARPU_RW
,
*
Cwork
->
blocks
[
b_row
*
NB
+
b_col
].
hdl
,
0
);
STARPU_R
,
Acheck
->
blocks
[
b_row
*
KB
+
b_aisle
].
hdl
,
STARPU_R
,
Bcheck
->
blocks
[
b_aisle
*
NB
+
b_col
].
hdl
,
STARPU_RW
,
Cwork
->
blocks
[
b_row
*
NB
+
b_col
].
hdl
,
0
);
}
starpu_mpi_task_insert
(
MPI_COMM_WORLD
,
&
nrm_cl
,
STARPU_R
,
Ccheck
->
blocks
[
b_row
*
NB
+
b_col
].
hdl
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment