Commit 0f26a756 authored by Antoine Jego's avatar Antoine Jego
Browse files

fixed handle pruning : calloc block intead of malloc

parent 45797b6b
cmake_minimum_required (VERSION 3.3)
project(starpu_example_dgemm C Fortran)
add_compile_options(-Wall -Wextra -pedantic) # -Werror)
# Check that we do no try to configure/build inside the source directory
# ----------------------------------------------------------------------
if( ${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_BINARY_DIR} )
......
......@@ -10,8 +10,8 @@ Matrix* alloc_matrix(int mb, int nb, int b, int p, int q, starpu_mpi_tag_t* tag)
{
starpu_mpi_comm_rank(MPI_COMM_WORLD, &comm_rank);
Matrix* X;
X = malloc(sizeof(Matrix));
X->blocks = malloc( mb*nb*sizeof(Block));
X = calloc(1, sizeof(Matrix));
X->blocks = calloc( mb*nb,sizeof(Block));
int i,j;
// printf("[%d] allocating %d x %d B %d on %dx%d\n", comm_rank, mb, nb, b, p, q);
for (i = 0; i<mb; i++)
......@@ -24,7 +24,7 @@ Matrix* alloc_matrix(int mb, int nb, int b, int p, int q, starpu_mpi_tag_t* tag)
X->blocks[i*nb+j].ld = b;
X->blocks[i*nb+j].tag= *tag;
X->blocks[i*nb+j].registered = 0;
X->blocks[i*nb+j].hdl = NULL;
//X->blocks[i*nb+j].hdl = NULL;
if (X->blocks[i*nb+j].owner == comm_rank)
X->blocks[i*nb+j].c = malloc(b*b*sizeof(double));
else
......@@ -42,12 +42,15 @@ void free_matrix(Matrix* X)
{
starpu_mpi_comm_rank(MPI_COMM_WORLD, &comm_rank);
int i,j;
for (i = 0; i<X->mb; i++)
Block* Xij;
int mb = X->mb, nb = X->nb;
for (i = 0; i<mb; i++)
{
for (j= 0; j<X->nb; j++)
for (j= 0; j<nb; j++)
{
if (X->blocks[i*X->nb+j].owner == comm_rank) {
free(X->blocks[i*X->nb+j].c);
Xij = & X->blocks[i*nb + j];
if (Xij->owner == comm_rank) {
free(Xij->c);
}
}
}
......@@ -60,7 +63,6 @@ void register_matrix(Matrix* X, int mb, int nb, char* name, int datatype, int pr
starpu_mpi_comm_rank(MPI_COMM_WORLD, &comm_rank);
int proc_row, proc_col;
int b_row, b_col;
int owner;
Block* Xij;
// comm_rank = proc_row * q + proc_col
proc_col = comm_rank % q;
......@@ -72,27 +74,26 @@ void register_matrix(Matrix* X, int mb, int nb, char* name, int datatype, int pr
for (b_col = 0; b_col < nb; b_col++)
{
Xij = & X->blocks[b_row*nb+b_col];
Xij->hdl = malloc(sizeof(starpu_data_handle_t));
// printf("[%d] %s_%d,%d=%dx%d | tag:%d | owned by %d\n",comm_rank,name,b_row,b_col,b_row%p,b_col%q,Xij->tag,Xij->owner);
if (Xij->owner == comm_rank)
{
// printf("[%d] %s_%d,%d=%dx%d (%p) | tag:%ld | my registration \n",comm_rank,name,b_row,b_col,b_row%p,b_col%q,Xij->hdl,Xij->tag);
if (datatype) {
starpu_tile_register( Xij->hdl, STARPU_MAIN_RAM, Xij );
starpu_tile_register( &Xij->hdl, STARPU_MAIN_RAM, Xij );
} else {
starpu_matrix_data_register( Xij->hdl, STARPU_MAIN_RAM,
starpu_matrix_data_register( &Xij->hdl, STARPU_MAIN_RAM,
(uintptr_t) Xij->c, Xij->m, Xij->n, Xij->ld,
sizeof(double));
}
starpu_mpi_data_register(*Xij->hdl, Xij->tag, Xij->owner);
// printf("[%d] X_%d,%d | mpi_data_register %p\n",comm_rank,b_row,b_col,*Xij->hdl);
starpu_mpi_data_register(Xij->hdl, Xij->tag, Xij->owner);
Xij->registered = 1;
} else if (!delay && (!prune_handles || (row && proc_row == b_row % p) ||
(col && proc_col == b_col % q) ||
(check && Xij->owner == 0) ||
(check && comm_rank == 0)) ) {
// printf("[%d] %s_%d,%d=%dx%d (%p) | tag:%ld | registered for %d\n",comm_rank,name,b_row,b_col,b_row%p,b_col%q,Xij->hdl,Xij->tag,Xij->owner);
block_starpu_register(Xij, datatype);
} else {
// printf("[%d] pruned %s_%d,%d\n",comm_rank,name,b_row,b_col);
// printf("[%d] %s_%d,%d=%dx%d (%p) | tag:%ld | owned by %d - I don't register\n",comm_rank,name,b_row,b_col,b_row%p,b_col%q,Xij->hdl,Xij->tag,Xij->owner);
}
}
}
......@@ -103,23 +104,26 @@ void unregister_matrix(Matrix* X, int mb, int nb)
starpu_mpi_comm_rank(MPI_COMM_WORLD, &comm_rank);
// printf("[%d]unregistering %dx%d matrix\n", comm_rank, mb, nb);
int b_row,b_col;
Block* Xij;
for (b_row = 0; b_row < mb; b_row++)
{
for (b_col = 0; b_col < nb; b_col++)
{
// assuming we flush, we do not need to unregister everywhere
if (X->blocks[b_row*nb+b_col].owner == comm_rank) {
Xij = & X->blocks[b_row*nb + b_col];
if (Xij->owner == comm_rank) {
// printf("[%d] unregistering X_%d,%d\n", comm_rank, b_row, b_col);
starpu_data_unregister(*X->blocks[b_row*nb+b_col].hdl);
starpu_data_unregister(Xij->hdl);
}
free(X->blocks[b_row*nb+b_col].hdl);
X->blocks[b_row*nb+b_col].registered = 0;
/* free(X->blocks[b_row*nb+b_col].hdl);
X->blocks[b_row*nb+b_col].hdl = NULL;
*/ Xij->registered = 0;
}
}
}
void print_block(Block* X, int b, int i, int j, char* name) {
void print_block(Block* X, int i, int j, char* name) {
starpu_mpi_comm_rank(MPI_COMM_WORLD, &comm_rank);
int b_row,b_col;
for (b_row = 0; b_row < X->m; b_row++) {
......@@ -134,7 +138,7 @@ void print_matrix(Matrix* X, char* name) {
for (i = 0 ; i < X->mb ; i++) {
for (j = 0 ; j < X->nb ; j++) {
if (comm_rank == X->blocks[i*X->nb+j].owner)
print_block(&X->blocks[i*X->nb + j], X->b, i, j, name);
print_block(&X->blocks[i*X->nb + j], i, j, name);
}
}
}
......@@ -144,13 +148,13 @@ void block_starpu_register(Block* Xij, int datatype) {
starpu_mpi_comm_rank(MPI_COMM_WORLD, &comm_rank);
// Xij->hdl = malloc(sizeof(starpu_data_handle_t));
if (datatype) {
starpu_tile_register( Xij->hdl, -1, Xij );
starpu_tile_register( &Xij->hdl, -1, Xij );
} else {
starpu_matrix_data_register( Xij->hdl, -1,
starpu_matrix_data_register( &Xij->hdl, -1,
(uintptr_t) NULL, Xij->m, Xij->n, Xij->ld,
sizeof(double));
}
starpu_mpi_data_register(*Xij->hdl, Xij->tag, Xij->owner);
starpu_mpi_data_register(Xij->hdl, Xij->tag, Xij->owner);
// printf("[%d] X_block | mpi_data_register %p\n",comm_rank,*Xij->hdl);
Xij->registered = 1;
} else {
......
......@@ -6,7 +6,8 @@ typedef struct Blocks
double* c;
int m,n,ld;
int owner;
starpu_data_handle_t* hdl;
//starpu_data_handle_t* hdl;
starpu_data_handle_t hdl;
starpu_mpi_tag_t tag;
int registered;
} Block;
......
......@@ -43,9 +43,9 @@
//const char *argp_program_version = "standalone 0.2";
//const char *argp_program_bug_address
static char doc[] = "Standalone DGEMM using StarPU-MPI";
static char args_doc[] = "-m [m] -n [n] -k [k] -b [b] -p [p] -q [q] --niter [l] [--check] [--trace] [--datatype] [--mpi-thread [t]] [--no-flush] [--prune] [--prune-handles] [--super-prune]";;
static char args_doc[] = "-m [m] -n [n] -k [k] -b [b] -p [p] -q [q] --niter [l] [--check] [--trace] [--datatype] [--mpi-thread [t]] [--no-flush] [--prune] [--prune-handles] [--super-prune]";
static struct argp_option options[] = {
{"m", 'm', "int", 0, "Number of rows in A and C (deprecated)" },
{"m", 'm', "int", 0, "Number of rows in A and C (deprecated)" },
{"n", 'n', "int", 0, "Dimension of A B and C" },
{"k", 'k', "int", 0, "Shared dimension of A and B (deprecated)" },
{"blocking", 'b', "int", 0, "Size of the square block of A, B and C (must divide m,n and k" },
......@@ -204,6 +204,7 @@ static void free_matrices(void)
free_matrix(A);
free_matrix(B);
free_matrix(C);
if (verbose) printf( "[%d] Freed matrices\n", comm_rank);
}
/* Register the matrix blocks to StarPU and to StarPU-MPI */
......@@ -428,7 +429,7 @@ static void init_matrix(Matrix* X, int mb, int nb)
{
// printf("[%d] fill X_%d,%d %p\n",comm_rank,row,col, X->blocks[row*nb+col].hdl);
starpu_mpi_task_insert(MPI_COMM_WORLD, &fill_cl,
STARPU_W, *X->blocks[row*nb+col].hdl, 0);
STARPU_W, X->blocks[row*nb+col].hdl, 0);
// printf("[%d] filled X_%d,%d\n",comm_rank,row,col);
}
}
......@@ -644,22 +645,24 @@ int main(int argc, char *argv[])
}
struct cl_zgemm_args_s *clargs = NULL;
if (c_local) {
if (verbose) printf("[%d] executing %d,%d (l:%d)\n",comm_rank,b_row,b_col,b_aisle);
if (verbose) printf("[%d] exec. C_%d,%d (%d-%p-%d) += A_%d,%d (%d-%p-%d) B_%d,%d (%d-%p-%d)\n", comm_rank,
b_row,b_col, Cij->registered,Cij->hdl,Cij->owner,
b_row,b_aisle,Ail->registered,Ail->hdl,Ail->owner,
b_aisle,b_col,Blj->registered,Blj->hdl,Blj->owner);
clargs = malloc(sizeof( struct cl_zgemm_args_s ));
clargs->alpha = alpha;
clargs->beta = b_aisle==0? beta : 1.0;
} else if (verbose){
printf("[%d] inserting %d,%d (l:%d)\n",comm_rank,b_row,b_col,b_aisle);
printf("[%d] insert. C_%d,%d (%d-%p-%d) += A_%d,%d (%d-%p-%d) B_%d,%d (%d-%p-%d)\n", comm_rank,
b_row,b_col, Cij->registered,Cij->hdl,Cij->owner,
b_row,b_aisle,Ail->registered,Ail->hdl,Ail->owner,
b_aisle,b_col,Blj->registered,Blj->hdl,Blj->owner);
}
if (verbose) printf("[%d] C_%d,%d (%d-%p-%d) += A_%d,%d (%d-%p-%d) B_%d,%d (%d-%p-%d)\n", comm_rank,
b_row,b_col, Cij->registered,Cij->hdl,Cij->owner,
b_row,b_aisle,Ail->registered,Ail->hdl,Ail->owner,
b_aisle,b_col,Blj->registered,Blj->hdl,Blj->owner);
starpu_mpi_task_insert(MPI_COMM_WORLD, &gemm_cl,
STARPU_CL_ARGS, clargs, sizeof(struct cl_zgemm_args_s),
STARPU_R, *Ail->hdl,
STARPU_R, *Blj->hdl,
STARPU_RW,*Cij->hdl, 0);
STARPU_R, Ail->hdl,
STARPU_R, Blj->hdl,
STARPU_RW,Cij->hdl, 0);
} else {
// printf("[%d] NOT inserted C_%d,%d += A_%d,%d B_%d,%d\n",comm_rank, b_row,b_col, b_row,b_aisle, b_aisle,b_col);
}
......@@ -670,7 +673,7 @@ int main(int argc, char *argv[])
{
Ail = & A->blocks[b_row*KB + b_aisle];
if (Ail->registered)
starpu_mpi_cache_flush(MPI_COMM_WORLD, *Ail->hdl);
starpu_mpi_cache_flush(MPI_COMM_WORLD, Ail->hdl);
}
}
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment