Commit e0a5e9b2 authored by Antoine Jego's avatar Antoine Jego
Browse files

idk what i did, i hope it's working back anyway

parent 81d0709e
......@@ -22,10 +22,12 @@ Matrix* alloc_matrix(int mb, int nb, int b, int p, int q, starpu_mpi_tag_t* tag)
X->blocks[i*nb+j].n = b;
X->blocks[i*nb+j].ld = b;
X->blocks[i*nb+j].tag= *tag;
X->blocks[i*nb+j].hdl= malloc(sizeof(starpu_data_handle_t));
X->blocks[i*nb+j].registered = 0;
X->blocks[i*nb+j].hdl = malloc(sizeof(starpu_data_handle_t));
if (X->blocks[i*nb+j].owner == comm_rank)
X->blocks[i*nb+j].c = malloc(b*b*sizeof(double));
else
X->blocks[i*nb+j].c = NULL;
*tag = *tag + 1;
}
}
......@@ -43,8 +45,9 @@ void free_matrix(Matrix* X)
{
for (j= 0; j<X->nb; j++)
{
if (X->blocks[i*X->nb+j].owner == comm_rank)
if (X->blocks[i*X->nb+j].owner == comm_rank) {
free(X->blocks[i*X->nb+j].c);
}
}
}
free(X->blocks);
......@@ -61,7 +64,7 @@ void register_matrix(Matrix* X, int mb, int nb, int datatype, int prune_handles,
// comm_rank = proc_row * q + proc_col
proc_col = comm_rank % q;
proc_row = (comm_rank - proc_col)/q;
// printf("[%d] delayed ? %d\n", comm_rank, delay);
// printf("[%d] delayed ? %d / check ? %d / dt ? %d / pr_hd ? %d\n", comm_rank, delay, check, datatype, prune_handles);
for (b_row = 0; b_row < mb; b_row++)
{
for (b_col = 0; b_col < nb; b_col++)
......@@ -77,7 +80,7 @@ void register_matrix(Matrix* X, int mb, int nb, int datatype, int prune_handles,
(uintptr_t) Xij->c, Xij->m, Xij->n, Xij->ld,
sizeof(double));
}
// printf("[%d] X_%d,%d | mpi_data_register %p %p\n",comm_rank,b_row,b_col,*Xij->hdl,Xij->hdl);
//printf("[%d] X_%d,%d | mpi_data_register %p %p\n",comm_rank,b_row,b_col,*Xij->hdl,Xij->hdl);
starpu_mpi_data_register(*Xij->hdl, Xij->tag, Xij->owner);
Xij->registered = 1;
} else if (!delay && (!prune_handles || (row && proc_row == b_row % p) ||
......@@ -133,6 +136,7 @@ void print_matrix(Matrix* X, char* name) {
void block_starpu_register(Block* Xij, int datatype) {
if (!Xij->registered) {
//printf("[%d] X_block | mpi_data_register %p %p\n",comm_rank,*Xij->hdl,Xij->hdl);
starpu_mpi_comm_rank(MPI_COMM_WORLD, &comm_rank);
if (datatype) {
starpu_tile_register( Xij->hdl, -1, Xij );
......@@ -143,5 +147,7 @@ void block_starpu_register(Block* Xij, int datatype) {
}
starpu_mpi_data_register(*Xij->hdl, Xij->tag, Xij->owner);
Xij->registered = 1;
} else {
//printf("[%d] X_block | already registered\n");
}
}
......@@ -190,7 +190,6 @@ static Matrix *C = NULL; /* C will be partitioned as MB x NB blocks */
starpu_mpi_tag_t tag = 0;
static void alloc_matrices(void)
{
if (verbose) printf( "[%d] Allocating matrices\n", comm_rank);
......@@ -208,7 +207,7 @@ static void free_matrices(void)
}
/* Register the matrix blocks to StarPU and to StarPU-MPI */
static void register_matrices(int prune_handles)
static void register_matrices()
{
if (verbose) printf("[%d] Registering matrices\n", comm_rank);
if (datatype) {
......@@ -245,6 +244,7 @@ struct cl_zgemm_args_s {
static void cpu_gemm(void *handles[], void *args)
{
if (verbose) printf("??? gemm_task\n");
struct cl_zgemm_args_s *clargs = (struct cl_zgemm_args_s *)args;
double *block_A;
......@@ -256,6 +256,7 @@ static void cpu_gemm(void *handles[], void *args)
unsigned ld_A;
unsigned ld_B;
unsigned ld_C;
if (verbose) printf("init gemm_task\n");
if (datatype) {
Block* A = ti_interface_get(handles[0]);
Block* B = ti_interface_get(handles[1]);
......@@ -280,7 +281,7 @@ static void cpu_gemm(void *handles[], void *args)
ld_B = STARPU_MATRIX_GET_LD(handles[1]);
ld_C = STARPU_MATRIX_GET_LD(handles[2]);
}
// if (verbose) printf("gemm_task\n");
if (verbose) printf("gemm_task\n");
// printf("DATA %d | ld A %d B %d C %d | alpha %f beta %f \n", datatype, ld_A, ld_B, ld_C, clargs->alpha, clargs->beta);
double start = starpu_timing_now();
cblas_dgemm( CblasRowMajor, CblasNoTrans, CblasNoTrans, // 2
......@@ -288,7 +289,7 @@ static void cpu_gemm(void *handles[], void *args)
ld_B, clargs->beta, block_C, ld_C ); // 13
double stop = starpu_timing_now();
double timing = stop - start;
//printf("gemm_task %f Gflop/s\n", 2.0*n_row_C*n_col_C*n_col_A/(timing*1000));
// printf("gemm_task %f Gflop/s\n", 2.0*n_row_C*n_col_C*n_col_A/(timing*1000));
}
int iseed[4] = { 1,1,1,1 };
......@@ -581,6 +582,7 @@ int main(int argc, char *argv[])
int barrier_ret, trial;
double start, stop;
double alpha = 3.14, beta=0.42;
barrier_ret = starpu_mpi_barrier(MPI_COMM_WORLD);
if (trace) starpu_fxt_start_profiling();
unsigned ctx;
int* procs;
......@@ -594,9 +596,9 @@ int main(int argc, char *argv[])
for (trial =0; trial < T; trial++)
{
alloc_matrices();
register_matrices(prune_handles);
register_matrices();
init_matrices();
Matrix* Cwork;
if (check) {
Cwork = alloc_matrix(MB,NB,BS,1,1,&tag);
......@@ -628,10 +630,10 @@ int main(int argc, char *argv[])
(!prune || (a_local || b_local || c_local))) {
if (delay) {
if (!prune_handles || c_local) {
block_starpu_register(&A->blocks[b_row*KB+b_aisle],datatype);
block_starpu_register(&B->blocks[b_aisle*NB+b_col],datatype);
block_starpu_register(& (A->blocks[b_row*KB+b_aisle]),datatype);
block_starpu_register(& (B->blocks[b_aisle*NB+b_col]),datatype);
}
block_starpu_register(&C->blocks[b_row*NB+b_col], datatype);
block_starpu_register(& (C->blocks[b_row*NB+b_col]), datatype);
}
struct cl_zgemm_args_s *clargs = NULL;
if (c_local) {
......@@ -641,20 +643,29 @@ int main(int argc, char *argv[])
}
starpu_mpi_task_insert(MPI_COMM_WORLD, &gemm_cl,
STARPU_CL_ARGS, clargs, sizeof(struct cl_zgemm_args_s),
STARPU_R, *A->blocks[b_row*KB+b_aisle].hdl,
STARPU_R, *B->blocks[b_aisle*NB+b_col].hdl,
STARPU_RW, *C->blocks[b_row*NB+b_col].hdl, 0);
// printf("[%d] inserted C_%d,%d += A_%d,%d B_%d,%d\n",comm_rank, b_row,b_col, b_row,b_aisle, b_aisle,b_col);
//STARPU_R, *A->blocks[b_row*KB+b_aisle].hdl,
STARPU_R, *A->blocks[b_row*KB+b_aisle].hdl,
STARPU_R, *B->blocks[b_aisle*NB+b_col].hdl,
STARPU_RW, *C->blocks[b_row*NB+b_col].hdl, 0);
//printf("[%d] inserted C_%d,%d += A_%d,%d B_%d,%d\n",comm_rank, b_row,b_col, b_row,b_aisle, b_aisle,b_col);
} else {
//printf("[%d] NOT inserted C_%d,%d += A_%d,%d B_%d,%d\n",comm_rank, b_row,b_col, b_row,b_aisle, b_aisle,b_col);
}
}
}
if (flush) {
for (b_aisle=0;b_aisle<KB;b_aisle++)
{
if (A->blocks[b_row*KB+b_aisle].registered) starpu_mpi_cache_flush(MPI_COMM_WORLD, *A->blocks[b_row*KB+b_aisle].hdl);
if (A->blocks[b_row*KB+b_aisle].registered) {
starpu_mpi_cache_flush(MPI_COMM_WORLD, *A->blocks[b_row*KB+b_aisle].hdl);
// printf("[%d] flushed A_%d,%d\n",comm_rank,b_row,b_aisle);
} else {
// printf("[%d] NOT flushing A_%d,%d\n",comm_rank,b_row,b_aisle);
}
}
}
}
//printf("[%d] finished submission\n",comm_rank);
starpu_mpi_wait_for_all(MPI_COMM_WORLD);
barrier_ret = starpu_mpi_barrier(MPI_COMM_WORLD);
stop = starpu_timing_now();
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment