Commit 0ec80708 authored by Antoine Jego's avatar Antoine Jego
Browse files

simpler handle registration accounts properly for delay or pruning handles

parent e0a5e9b2
......@@ -23,7 +23,6 @@ Matrix* alloc_matrix(int mb, int nb, int b, int p, int q, starpu_mpi_tag_t* tag)
X->blocks[i*nb+j].ld = b;
X->blocks[i*nb+j].tag= *tag;
X->blocks[i*nb+j].registered = 0;
X->blocks[i*nb+j].hdl = malloc(sizeof(starpu_data_handle_t));
if (X->blocks[i*nb+j].owner == comm_rank)
X->blocks[i*nb+j].c = malloc(b*b*sizeof(double));
else
......@@ -73,15 +72,16 @@ void register_matrix(Matrix* X, int mb, int nb, int datatype, int prune_handles,
// printf("[%d] X_%d,%d | tag:%d\n",comm_rank,b_row,b_col,Xij->tag);
if (Xij->owner == comm_rank)
{
Xij->hdl = malloc(sizeof(starpu_data_handle_t));
if (datatype) {
starpu_tile_register( Xij->hdl, STARPU_MAIN_RAM, Xij );
starpu_tile_register( &Xij->hdl, STARPU_MAIN_RAM, Xij );
} else {
starpu_matrix_data_register(Xij->hdl, STARPU_MAIN_RAM,
starpu_matrix_data_register( &Xij->hdl, STARPU_MAIN_RAM,
(uintptr_t) Xij->c, Xij->m, Xij->n, Xij->ld,
sizeof(double));
}
//printf("[%d] X_%d,%d | mpi_data_register %p %p\n",comm_rank,b_row,b_col,*Xij->hdl,Xij->hdl);
starpu_mpi_data_register(*Xij->hdl, Xij->tag, Xij->owner);
starpu_mpi_data_register(Xij->hdl, Xij->tag, Xij->owner);
// printf("[%d] X_%d,%d | mpi_data_register %p\n",comm_rank,b_row,b_col,Xij->hdl);
Xij->registered = 1;
} else if (!delay && (!prune_handles || (row && proc_row == b_row % p) ||
(col && proc_col == b_col % q) ||
......@@ -107,7 +107,7 @@ void unregister_matrix(Matrix* X, int mb, int nb)
// assuming we flush, we do not need to unregister everywhere
if (X->blocks[b_row*nb+b_col].owner == comm_rank) {
// printf("[%d] unregistering X_%d,%d\n", comm_rank, b_row, b_col);
starpu_data_unregister(*X->blocks[b_row*nb+b_col].hdl);
starpu_data_unregister(X->blocks[b_row*nb+b_col].hdl);
}
}
}
......@@ -136,18 +136,19 @@ void print_matrix(Matrix* X, char* name) {
void block_starpu_register(Block* Xij, int datatype) {
if (!Xij->registered) {
//printf("[%d] X_block | mpi_data_register %p %p\n",comm_rank,*Xij->hdl,Xij->hdl);
Xij->hdl = malloc(sizeof(starpu_data_handle_t));
// printf("[%d] X_block | mpi_data_register %p\n",comm_rank,Xij->hdl);
starpu_mpi_comm_rank(MPI_COMM_WORLD, &comm_rank);
if (datatype) {
starpu_tile_register( Xij->hdl, -1, Xij );
starpu_tile_register( &Xij->hdl, -1, Xij );
} else {
starpu_matrix_data_register(Xij->hdl, -1,
starpu_matrix_data_register( &Xij->hdl, -1,
(uintptr_t) NULL, Xij->m, Xij->n, Xij->ld,
sizeof(double));
}
starpu_mpi_data_register(*Xij->hdl, Xij->tag, Xij->owner);
starpu_mpi_data_register(Xij->hdl, Xij->tag, Xij->owner);
Xij->registered = 1;
} else {
//printf("[%d] X_block | already registered\n");
// printf("[%d] X_block | already registered\n");
}
}
......@@ -6,7 +6,7 @@ typedef struct Blocks
double* c;
int m,n,ld;
int owner;
starpu_data_handle_t* hdl;
starpu_data_handle_t hdl;
starpu_mpi_tag_t tag;
int registered;
} Block;
......
......@@ -244,7 +244,6 @@ struct cl_zgemm_args_s {
static void cpu_gemm(void *handles[], void *args)
{
if (verbose) printf("??? gemm_task\n");
struct cl_zgemm_args_s *clargs = (struct cl_zgemm_args_s *)args;
double *block_A;
......@@ -256,7 +255,6 @@ static void cpu_gemm(void *handles[], void *args)
unsigned ld_A;
unsigned ld_B;
unsigned ld_C;
if (verbose) printf("init gemm_task\n");
if (datatype) {
Block* A = ti_interface_get(handles[0]);
Block* B = ti_interface_get(handles[1]);
......@@ -281,7 +279,7 @@ static void cpu_gemm(void *handles[], void *args)
ld_B = STARPU_MATRIX_GET_LD(handles[1]);
ld_C = STARPU_MATRIX_GET_LD(handles[2]);
}
if (verbose) printf("gemm_task\n");
// if (verbose) printf("gemm_task\n");
// printf("DATA %d | ld A %d B %d C %d | alpha %f beta %f \n", datatype, ld_A, ld_B, ld_C, clargs->alpha, clargs->beta);
double start = starpu_timing_now();
cblas_dgemm( CblasRowMajor, CblasNoTrans, CblasNoTrans, // 2
......@@ -322,16 +320,19 @@ static void cpu_fill(void *handles[], void *arg)
static void cpu_copy(void *handles[], void *arg)
{
(void)arg;
// FIXME
double *block_A = (double *)STARPU_MATRIX_GET_PTR(handles[0]);
double *block_B = (double *)STARPU_MATRIX_GET_PTR(handles[1]);
double *block_A, *block_B;
unsigned n_col_A, n_row_A;
if (datatype) {
Block* tile = ti_interface_get(handles[0]);
n_col_A = tile->n;
n_row_A = tile->m;
Block* tile_A = ti_interface_get(handles[0]);
Block* tile_B = ti_interface_get(handles[1]);
block_A = tile_A->c;
block_B = tile_B->c;
n_col_A = tile_A->n;
n_row_A = tile_A->m;
} else {
block_A = (double *)STARPU_MATRIX_GET_PTR(handles[0]);
block_B = (double *)STARPU_MATRIX_GET_PTR(handles[1]);
n_col_A = STARPU_MATRIX_GET_NX(handles[0]);
n_row_A = STARPU_MATRIX_GET_NY(handles[0]);
}
......@@ -426,7 +427,7 @@ static void init_matrix(Matrix* X, int mb, int nb)
{
// printf("[%d] fill X_%d,%d %p\n",comm_rank,row,col, X->blocks[row*nb+col].hdl);
starpu_mpi_task_insert(MPI_COMM_WORLD, &fill_cl,
STARPU_W, *X->blocks[row*nb+col].hdl, 0);
STARPU_W, X->blocks[row*nb+col].hdl, 0);
// printf("[%d] filled X_%d,%d\n",comm_rank,row,col);
}
}
......@@ -444,8 +445,8 @@ static void copy_matrix(Matrix* A, Matrix* B)
|| B->blocks[row*A->nb+col].owner == comm_rank)
{
starpu_mpi_task_insert(MPI_COMM_WORLD, &copy_cl,
STARPU_W, *A->blocks[row*A->nb+col].hdl,
STARPU_R, *B->blocks[row*A->nb+col].hdl, 0);
STARPU_W, A->blocks[row*A->nb+col].hdl,
STARPU_R, B->blocks[row*A->nb+col].hdl, 0);
}
}
}
......@@ -629,6 +630,7 @@ int main(int argc, char *argv[])
if ((!super_prune || (c_local || (a_local && b_col <= Q) || (b_local && b_row <= P) )) &&
(!prune || (a_local || b_local || c_local))) {
if (delay) {
// printf("[%d] late registration i,j,l %d,%d,%d\n",comm_rank,b_row,b_col,b_aisle);
if (!prune_handles || c_local) {
block_starpu_register(& (A->blocks[b_row*KB+b_aisle]),datatype);
block_starpu_register(& (B->blocks[b_aisle*NB+b_col]),datatype);
......@@ -637,6 +639,7 @@ int main(int argc, char *argv[])
}
struct cl_zgemm_args_s *clargs = NULL;
if (c_local) {
// printf("[%d] executing %d,%d (l:%d)\n",comm_rank,b_row,b_col,b_aisle);
clargs = malloc(sizeof( struct cl_zgemm_args_s ));
clargs->alpha = alpha;
clargs->beta = b_aisle==0? beta : 1.0;
......@@ -644,9 +647,9 @@ int main(int argc, char *argv[])
starpu_mpi_task_insert(MPI_COMM_WORLD, &gemm_cl,
STARPU_CL_ARGS, clargs, sizeof(struct cl_zgemm_args_s),
//STARPU_R, *A->blocks[b_row*KB+b_aisle].hdl,
STARPU_R, *A->blocks[b_row*KB+b_aisle].hdl,
STARPU_R, *B->blocks[b_aisle*NB+b_col].hdl,
STARPU_RW, *C->blocks[b_row*NB+b_col].hdl, 0);
STARPU_R, A->blocks[b_row*KB+b_aisle].hdl,
STARPU_R, B->blocks[b_aisle*NB+b_col].hdl,
STARPU_RW,C->blocks[b_row * NB+b_col].hdl, 0);
//printf("[%d] inserted C_%d,%d += A_%d,%d B_%d,%d\n",comm_rank, b_row,b_col, b_row,b_aisle, b_aisle,b_col);
} else {
//printf("[%d] NOT inserted C_%d,%d += A_%d,%d B_%d,%d\n",comm_rank, b_row,b_col, b_row,b_aisle, b_aisle,b_col);
......@@ -656,12 +659,8 @@ int main(int argc, char *argv[])
if (flush) {
for (b_aisle=0;b_aisle<KB;b_aisle++)
{
if (A->blocks[b_row*KB+b_aisle].registered) {
starpu_mpi_cache_flush(MPI_COMM_WORLD, *A->blocks[b_row*KB+b_aisle].hdl);
// printf("[%d] flushed A_%d,%d\n",comm_rank,b_row,b_aisle);
} else {
// printf("[%d] NOT flushing A_%d,%d\n",comm_rank,b_row,b_aisle);
}
if (A->blocks[b_row*KB+b_aisle].registered)
starpu_mpi_cache_flush(MPI_COMM_WORLD, A->blocks[b_row*KB+b_aisle].hdl);
}
}
}
......@@ -703,9 +702,9 @@ int main(int argc, char *argv[])
clargs->beta = b_aisle==0? beta : 1.0;
starpu_mpi_task_insert(MPI_COMM_WORLD, &gemm_cl,
STARPU_CL_ARGS, clargs, sizeof(struct cl_zgemm_args_s),
STARPU_R, *Acheck->blocks[b_row*KB+b_aisle].hdl,
STARPU_R, *Bcheck->blocks[b_aisle*NB+b_col].hdl,
STARPU_RW, *Cwork->blocks[b_row*NB+b_col].hdl, 0);
STARPU_R, Acheck->blocks[b_row*KB+b_aisle].hdl,
STARPU_R, Bcheck->blocks[b_aisle*NB+b_col].hdl,
STARPU_RW, Cwork->blocks[b_row*NB+b_col].hdl, 0);
}
starpu_mpi_task_insert(MPI_COMM_WORLD, &nrm_cl,
STARPU_R, Ccheck->blocks[b_row*NB+b_col].hdl,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment