Commit b915f268 authored by AGULLO Emmanuel's avatar AGULLO Emmanuel
Browse files

Merge branch 'master' of gitlab.inria.fr:solverstack/mini-examples/starpu_example_dgemm

parents 10561c0f 44b11fcf
......@@ -47,37 +47,48 @@ void free_matrix(Matrix* X, int mb, int nb)
free(X);
}
void register_matrix(Matrix* X, starpu_data_handle_t* X_h, starpu_mpi_tag_t *tag, int mb, int nb, int datatype)
void register_matrix(Matrix* X, starpu_data_handle_t* X_h, starpu_mpi_tag_t *tag, int mb, int nb, int datatype, int prune_handles, int p, int q, int row, int col)
{
starpu_mpi_comm_rank(MPI_COMM_WORLD, &comm_rank);
int proc_row, proc_col;
int b_row, b_col;
int owner;
Block* Xij;
// comm_rank = proc_row * q + proc_col
proc_col = comm_rank % q;
proc_row = (comm_rank - proc_col)/q;
for (b_row = 0; b_row < mb; b_row++)
{
for (b_col = 0; b_col < nb; b_col++)
{
if (X->blocks[b_row*nb+b_col].owner == comm_rank)
Xij = & X->blocks[b_row*nb+b_col];
//printf("[%d] X_%d,%d | tag:%d\n",comm_rank,b_row,b_col,*tag + b_row*nb + b_col);
if (Xij->owner == comm_rank)
{
if (datatype) {
starpu_tile_register( &X_h[b_row*nb+b_col], STARPU_MAIN_RAM, &X->blocks[b_row*nb+b_col] );
starpu_tile_register( &X_h[b_row*nb+b_col], STARPU_MAIN_RAM, Xij );
} else {
starpu_matrix_data_register(&X_h[b_row*nb+b_col], STARPU_MAIN_RAM,
(uintptr_t) X->blocks[b_row*nb+b_col].c, X->b, X->b, X->b,
(uintptr_t) Xij->c, X->b, X->b, X->b,
sizeof(double));
}
} else {
starpu_mpi_data_register(X_h[b_row*nb+b_col], (*tag + b_row*nb + b_col), Xij->owner);
} else if (!prune_handles || (row && proc_row == b_row % p) ||
(col && proc_col == b_col % q) ) {
if (datatype) {
starpu_tile_register( &X_h[b_row*nb+b_col], -1, &X->blocks[b_row*nb+b_col] );
starpu_tile_register( &X_h[b_row*nb+b_col], -1, Xij );
} else {
starpu_matrix_data_register(&X_h[b_row*nb+b_col], -1,
(uintptr_t) NULL, X->b, X->b, X->b,
sizeof(double));
}
starpu_mpi_data_register(X_h[b_row*nb+b_col], (*tag + b_row*nb + b_col), Xij->owner);
} else {
// printf("[%d] pruned X_%d,%d\n",comm_rank,b_row,b_col);
}
// printf("tag:%d\n",*tag);
starpu_mpi_data_register(X_h[b_row*nb+b_col], (*tag)++, X->blocks[b_row*nb+b_col].owner);
}
}
*tag = *tag + mb*nb;
}
void unregister_matrix(Matrix* X, starpu_data_handle_t* X_h, int mb, int nb)
......@@ -88,6 +99,7 @@ void unregister_matrix(Matrix* X, starpu_data_handle_t* X_h, int mb, int nb)
{
for (b_col = 0; b_col < nb; b_col++)
{
// assuming we flush, we do not need to unregister everywhere
if (X->blocks[b_row*nb+b_col].owner == comm_rank)
starpu_data_unregister(X_h[b_row*nb+b_col]);
}
......
......@@ -16,7 +16,7 @@ typedef struct Matrices
Matrix* alloc_matrix(int mb, int nb, int b, int p, int q);
void free_matrix(Matrix* X, int mb, int nb);
void register_matrix(Matrix* X, starpu_data_handle_t* X_h, starpu_mpi_tag_t *tag, int mb, int nb, int datatype);
void register_matrix(Matrix* X, starpu_data_handle_t* X_h, starpu_mpi_tag_t *tag, int mb, int nb, int datatype, int prune_handles, int p, int q, int row, int col);
void unregister_matrix(Matrix* X, starpu_data_handle_t* X_h, int mb, int nb);
......
#include <starpu.h>
#include <starpu_mpi.h>
#include <mpi.h>
#include "dsmat.h"
#include "optional_matrix_interface.h"
static inline Block *
mi_handle_get( starpu_data_handle_t handle )
{
starpu_matrix_interface_t *matrix_interface = (starpu_matrix_interface_t *)
starpu_data_get_interface_on_node( handle, STARPU_MAIN_RAM );
#ifdef STARPU_DEBUG
STARPU_ASSERT_MSG( matrix_interface->id == starpu_interface_matrix_ops.interfaceid,
"Error. The given data is not a _matrix." );
#endif
return &(matrix_interface->matrix);
}
static void
mi_init( void *data_interface )
{
starpu_matrix_interface_t *matrix_interface = data_interface;
matrix_interface->id = starpu_interface_matrix_ops.interfaceid;
matrix_interface->allocsize = -1;
}
static void
mi_register_data_handle( starpu_data_handle_t handle,
unsigned home_node,
void *data_interface )
{
starpu_matrix_interface_t *matrix_interface = (starpu_matrix_interface_t *) data_interface;
unsigned node;
for (node = 0; node < STARPU_MAXNODES; node++)
{
starpu_matrix_interface_t *local_interface = (starpu_matrix_interface_t *)
starpu_data_get_interface_on_node(handle, node);
memcpy( local_interface, matrix_interface,
sizeof( starpu_matrix_interface_t ) );
if ( node != home_node )
{
local_interface->dev_handle = 0;
local_interface->matrix.c = NULL;
local_interface->matrix.ld = -1;
}
}
}
static starpu_ssize_t
mi_allocate_data_on_node( void *data_interface, unsigned node )
{
uintptr_t addr = 0, handle;
starpu_matrix_interface_t *matrix_interface =
(starpu_matrix_interface_t *) data_interface;
uint32_t ld = matrix_interface->matrix.m;
starpu_ssize_t allocated_memory;
allocated_memory = matrix_interface->allocsize;
if ( allocated_memory <= 0 ) {
return 0;
}
handle = starpu_malloc_on_node( node, allocated_memory );
if ( !handle ) {
return -ENOMEM;
}
if ( starpu_node_get_kind(node) != STARPU_OPENCL_RAM ) {
addr = handle;
}
/* update the data properly */
matrix_interface->matrix.c = (void*)addr;
matrix_interface->matrix.ld = ld;
matrix_interface->dev_handle = handle;
return allocated_memory;
}
static void
mi_free_data_on_node( void *data_interface, unsigned node )
{
starpu_matrix_interface_t *matrix_interface =
(starpu_matrix_interface_t *) data_interface;
{
assert( (uintptr_t)(matrix_interface->matrix.c) == matrix_interface->dev_handle );
}
starpu_free_on_node( node, matrix_interface->dev_handle, matrix_interface->allocsize );
matrix_interface->matrix.c = NULL;
matrix_interface->dev_handle = 0;
}
static void *
mi_to_pointer( void *data_interface, unsigned node )
{
(void) node;
starpu_matrix_interface_t *matrix_interface = data_interface;
return (void*)(matrix_interface->matrix.c);
}
static int
mi_pointer_is_inside( void *data_interface, unsigned node, void *ptr )
{
(void) node;
starpu_matrix_interface_t *matrix_interface = data_interface;
char *begin = (char*) matrix_interface->matrix.c;
char *end = begin + matrix_interface->allocsize;
return ( (char*) ptr >= begin )
&& ( (char*) ptr < end );
}
static size_t
mi_get_size(starpu_data_handle_t handle)
{
starpu_matrix_interface_t *matrix_interface =
starpu_data_get_interface_on_node( handle, STARPU_MAIN_RAM );
size_t elemsize = sizeof(double);
return matrix_interface->matrix.m * matrix_interface->matrix.n * elemsize;
}
static size_t
mi_get_alloc_size(starpu_data_handle_t handle)
{
starpu_matrix_interface_t *matrix_interface =
starpu_data_get_interface_on_node( handle, STARPU_MAIN_RAM );
STARPU_ASSERT_MSG( matrix_interface->allocsize != (size_t)-1,
"The _matrix allocation size needs to be defined" );
return matrix_interface->allocsize;
}
static uint32_t
mi_footprint( starpu_data_handle_t handle )
{
Block *matrix = mi_handle_get( handle );
return starpu_hash_crc32c_be( matrix->m, matrix->n );
}
static uint32_t
mi_alloc_footprint( starpu_data_handle_t handle )
{
return starpu_hash_crc32c_be( mi_handle_get_allocsize(handle), 0 );
}
static int
mi_compare( void *data_interface_a, void *data_interface_b )
{
starpu_matrix_interface_t *_matrix_a = (starpu_matrix_interface_t *) data_interface_a;
starpu_matrix_interface_t *_matrix_b = (starpu_matrix_interface_t *) data_interface_b;
/* Two matrices are considered compatible if they have the same size */
return ( _matrix_a->matrix.m == _matrix_b->matrix.m )
&& ( _matrix_a->matrix.n == _matrix_b->matrix.n );
}
static int
mi_alloc_compare(void *data_interface_a, void *data_interface_b)
{
starpu_matrix_interface_t *_matrix_a = (starpu_matrix_interface_t *) data_interface_a;
starpu_matrix_interface_t *_matrix_b = (starpu_matrix_interface_t *) data_interface_b;
/* Two matrices are considered compatible if they have the same allocated size */
return ( _matrix_a->allocsize == _matrix_b->allocsize );
}
static void
mi_display( starpu_data_handle_t handle, FILE *f )
{
starpu_matrix_interface_t *matrix_interface = (starpu_matrix_interface_t *)
starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
fprintf( f, "%u\t%u\t",
matrix_interface->matrix.m,
matrix_interface->matrix.n );
}
static int
mi_pack_data_fullrank( starpu_matrix_interface_t *matrix_interface,
void *ptr )
{
char *matrix = (void *)matrix_interface->matrix.c;
if ( matrix_interface->matrix.m == matrix_interface->matrix.ld ) {
memcpy( ptr, matrix, matrix_interface->allocsize );
}
else {
int n;
char *tmpptr = ptr;
for(n=0; n<matrix_interface->matrix.n; n++)
{
size_t elemsize = sizeof(double);
size_t size = matrix_interface->matrix.m * elemsize;
memcpy( tmpptr, matrix, size );
tmpptr += size;
matrix += matrix_interface->matrix.ld * elemsize;
}
}
return 0;
}
static int
mi_pack_data( starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count )
{
STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));
starpu_matrix_interface_t *matrix_interface = (starpu_matrix_interface_t *)
starpu_data_get_interface_on_node(handle, node);
size_t size;
size = (starpu_ssize_t)(matrix_interface->allocsize);
*count = size + sizeof(size_t) + sizeof(Block);
if ( ptr != NULL )
{
char *tmp;
*ptr = (void *)starpu_malloc_on_node_flags( node, *count, 0 );
tmp = (char*)(*ptr);
/* Start by the size to allocate on reception */
memcpy( tmp, &size, sizeof(size_t) );
tmp += sizeof(size_t);
/* Copy the matrix metadata */
memcpy( tmp, &(matrix_interface->matrix), sizeof(Block) );
tmp += sizeof(Block);
/* Pack the real data */
mi_pack_data_fullrank( matrix_interface, tmp );
}
return 0;
}
static int
mi_unpack_data_fullrank( starpu_matrix_interface_t *matrix_interface,
void *ptr )
{
char *matrix = (void *)matrix_interface->matrix.c;
assert( matrix != NULL );
if ( matrix_interface->matrix.m == matrix_interface->matrix.ld ) {
memcpy( matrix, ptr, matrix_interface->allocsize );
}
else {
int n;
char *tmpptr = ptr;
for(n=0 ; n<matrix_interface->matrix.n; n++)
{
size_t elemsize = sizeof(double);
size_t size = matrix_interface->matrix.m * elemsize;
memcpy( matrix, tmpptr, size );
tmpptr += size;
matrix += matrix_interface->matrix.ld * elemsize;
}
}
return 0;
}
static int
mi_peek_data( starpu_data_handle_t handle, unsigned node, void *ptr, size_t count )
{
STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));
starpu_matrix_interface_t *matrix_interface = (starpu_matrix_interface_t *)
starpu_data_get_interface_on_node(handle, node);
char *tmp = ptr;
/*
* * We may end up here if an early reception occured before the handle of the
* * received data has been registered. Thus, datatype was not existant and we
* * need to unpack the data ourselves
* */
STARPU_ASSERT( count == matrix_interface->allocsize );
/* Unpack the real data */
mi_unpack_data_fullrank( matrix_interface, tmp );
return 0;
}
static int
mi_unpack_data( starpu_data_handle_t handle, unsigned node, void *ptr, size_t count )
{
mi_peek_data( handle, node, ptr, count );
/* Free the received information */
starpu_free_on_node_flags( node, (uintptr_t)ptr, count, 0 );
return 0;
}
static starpu_ssize_t
mi_describe( void *data_interface, char *buf, size_t size )
{
starpu_matrix_interface_t *matrix_interface = (starpu_matrix_interface_t *) data_interface;
return snprintf( buf, size, "M%ux%ux8",
(unsigned) matrix_interface->matrix.m,
(unsigned) matrix_interface->matrix.n);
}
static int mi_copy_any_to_any( void *src_interface, unsigned src_node,
void *dst_interface, unsigned dst_node, void *async_data )
{
starpu_matrix_interface_t *_matrix_src = (starpu_matrix_interface_t *) src_interface;
starpu_matrix_interface_t *_matrix_dst = (starpu_matrix_interface_t *) dst_interface;
size_t elemsize = sizeof(double);
size_t m = _matrix_src->matrix.m;
size_t n = _matrix_src->matrix.n;
size_t ld_src = _matrix_src->matrix.ld;
size_t ld_dst = _matrix_dst->matrix.ld;
int ret = 0;
void *src_mat = &(_matrix_src->matrix).c ;
void *dst_mat = &(_matrix_dst->matrix).c ;
ld_src *= elemsize;
ld_dst *= elemsize;
if (starpu_interface_copy2d( (uintptr_t) src_mat, 0, src_node,
(uintptr_t) dst_mat, 0, dst_node,
m * elemsize, n, ld_src, ld_dst, async_data ) ) {
ret = -EAGAIN;
}
starpu_interface_data_copy( src_node, dst_node, (size_t) n*m*elemsize );
return ret;
}
int
mi_allocate_datatype_node( starpu_data_handle_t handle,
unsigned node,
MPI_Datatype *datatype )
{
int ret;
starpu_matrix_interface_t *_matrix_interface = (starpu_matrix_interface_t *)
starpu_data_get_interface_on_node( handle, node );
size_t m = _matrix_interface->matrix.m;
size_t n = _matrix_interface->matrix.n;
size_t ld = _matrix_interface->matrix.ld;
size_t elemsize = sizeof(double);
ret = MPI_Type_vector( n, m * elemsize, ld * elemsize, MPI_BYTE, datatype );
STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_vector failed");
ret = MPI_Type_commit( datatype );
STARPU_ASSERT_MSG(ret == MPI_SUCCESS, "MPI_Type_commit failed");
return 0;
}
int
mi_allocate_datatype( starpu_data_handle_t handle,
MPI_Datatype *datatype )
{
return mi_allocate_datatype_node( handle, STARPU_MAIN_RAM, datatype );
}
void
mi_free_datatype( MPI_Datatype *datatype )
{
MPI_Type_free( datatype );
}
int
mi_handle_get_m( starpu_data_handle_t handle )
{
Block *matrix = mi_handle_get( handle );
return matrix->m;
}
int
mi_handle_get_n( starpu_data_handle_t handle )
{
Block *matrix = mi_handle_get( handle );
return matrix->n;
}
int
mi_handle_get_ld( starpu_data_handle_t handle )
{
Block *matrix = mi_handle_get( handle );
return matrix->ld;
}
size_t
mi_handle_get_allocsize( starpu_data_handle_t handle )
{
starpu_matrix_interface_t *matrix_interface = (starpu_matrix_interface_t *)
starpu_data_get_interface_on_node( handle, STARPU_MAIN_RAM );
return matrix_interface->allocsize;
}
static const struct starpu_data_copy_methods mi_copy_methods =
{
.any_to_any = mi_copy_any_to_any,
};
struct starpu_data_interface_ops starpu_interface_matrix_ops =
{
.init = mi_init,
.register_data_handle = mi_register_data_handle,
.allocate_data_on_node = mi_allocate_data_on_node,
.free_data_on_node = mi_free_data_on_node,
.to_pointer = mi_to_pointer,
.pointer_is_inside = mi_pointer_is_inside,
.get_size = mi_get_size,
.get_alloc_size = mi_get_alloc_size,
.footprint = mi_footprint,
.alloc_footprint = mi_alloc_footprint,
.compare = mi_compare,
.alloc_compare = mi_alloc_compare,
.display = mi_display,
#if defined (HAVE_STARPU_DATA_PEEK)
.peek_data = mi_peek_data,
#endif
.pack_data = mi_pack_data,
.unpack_data = mi_unpack_data,
.describe = mi_describe,
.copy_methods =&mi_copy_methods,
.interfaceid = STARPU_UNKNOWN_INTERFACE_ID,
.interface_size = sizeof(starpu_matrix_interface_t),
.name = "STARPU_TILE_INTERFACE"
};
void
starpu_matrix_register( starpu_data_handle_t *handleptr,
int home_node,
Block *matrix)
{
size_t elemsize = sizeof(double);
starpu_matrix_interface_t _matrix_interface =
{
.id = starpu_interface_matrix_ops.interfaceid,
.dev_handle = (intptr_t)(matrix->c),
.allocsize = -1,
.matrixsize = matrix->m * matrix->n * elemsize,
};
memcpy( &(_matrix_interface.matrix), matrix, sizeof( Block ) );
_matrix_interface.allocsize = matrix->m * matrix->n * elemsize;
starpu_data_register( handleptr, home_node, &_matrix_interface, &starpu_interface_matrix_ops );
}
void
starpu_matrix_interface_register()
{
if ( starpu_interface_matrix_ops.interfaceid == STARPU_UNKNOWN_INTERFACE_ID )
{
starpu_interface_matrix_ops.interfaceid = starpu_data_interface_get_next_id();
#if defined (HAVE_STARPU_MPI_INTERFACE_DATATYPE_NODE_REGISTER)
starpu_mpi_interface_datatype_node_register( starpu_interface_matrix_ops.interfaceid,
mi_allocate_datatype_node,
mi_free_datatype );
#else
starpu_mpi_interface_datatype_register( starpu_interface_matrix_ops.interfaceid,
mi_allocate_datatype,
mi_free_datatype );
#endif
}
}
void
starpu_matrix_interface_unregister()
{
if ( starpu_interface_matrix_ops.interfaceid != STARPU_UNKNOWN_INTERFACE_ID )
{
starpu_mpi_interface_datatype_unregister( starpu_interface_matrix_ops.interfaceid );
}
}
#ifndef TILE_INTERFACE_STANDALONE_H
#define TILE_INTERFACE_STANDALONE_H
#include <starpu.h>
#include "dsmat.h"
extern struct starpu_data_interface_ops starpu_interface_matrix_ops;
typedef struct starpu_matrix_interface_s
{
enum starpu_data_interface_id id; /**< Identifier of the interface */
uintptr_t dev_handle; /**< device handle of the matrix */
size_t allocsize; /**< size actually currently allocated */
size_t matrixsize; /**< size of the elements of the matrix */
Block* matrix;
} starpu_matrix_interface_t;
void starpu_my_matrix_register( starpu_data_handle_t *handleptr,
int home_node,
Block *matrix);
int mi_handle_get_m ( starpu_data_handle_t handle );
int mi_handle_get_n ( starpu_data_handle_t handle );
size_t mi_handle_get_allocsize( starpu_data_handle_t handle );
static inline Block* mi_interface_get(starpu_matrix_interface_t *interface) {
return &(interface->matrix);
}
int mi_allocate_datatype( starpu_data_handle_t handle,
MPI_Datatype *datatype );
void mi_free_datatype( MPI_Datatype *datatype );
void starpu_matrix_interface_register();
void starpu_matrix_interface_unregister();
#endif
......@@ -44,22 +44,23 @@
//const char *argp_program_version = "standalone 0.2";