Mentions légales du service

Skip to content
Snippets Groups Projects
runtime_descriptor.c 13.1 KiB
Newer Older
 * @copyright 2009-2014 The University of Tennessee and The University of
Mathieu Faverge's avatar
Mathieu Faverge committed
 *                      Tennessee Research Foundation. All rights reserved.
 * @copyright 2012-2017 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
 *                      Univ. Bordeaux. All rights reserved.
 ***
 *
 * @brief Chameleon StarPU descriptor routines
Mathieu Faverge's avatar
Mathieu Faverge committed
 * @version 1.0.0
 * @author Cedric Augonnet
 * @author Mathieu Faverge
 * @author Cedric Castagnede
 * @date 2010-11-15
 *
#include <unistd.h>
#include "chameleon_starpu.h"
#if defined(CHAMELEON_USE_MPI)
/* Take 24 bits for the tile id, and 7 bits for descriptor id.
 These values can be changed through the call MORSE_user_tag_size(int tag_width, int tag_sep) */
#define TAG_WIDTH_MIN 20
static int tag_width = 31;
static int tag_sep   = 24;
static int _tag_mpi_initialized_ = 0;
Mathieu Faverge's avatar
Mathieu Faverge committed
static inline int
chameleon_starpu_tag_init( int user_tag_width,
                           int user_tag_sep )
{
    if (!_tag_mpi_initialized_) {
        int *tag_ub = NULL;
        int ok = 0;

        tag_width = user_tag_width;
        tag_sep   = user_tag_sep;

#if defined(HAVE_STARPU_MPI_COMM_GET_ATTR)
        starpu_mpi_comm_get_attr(MPI_COMM_WORLD, MPI_TAG_UB, &tag_ub, &ok);
#else
        MPI_Comm_get_attr(MPI_COMM_WORLD, MPI_TAG_UB, &tag_ub, &ok);
#endif

        if ( !ok ) {
            morse_error("RUNTIME_desc_create", "MPI_TAG_UB not known by StarPU");
        }

        while ( ((uintptr_t)((1UL<<tag_width) - 1) > (uintptr_t)(*tag_ub) ) &&
                (tag_width >= TAG_WIDTH_MIN) )
        {
            tag_width--;
            tag_sep--;
        }

        if ( tag_width < TAG_WIDTH_MIN ) {
            morse_error("RUNTIME_desc_create", "MPI_TAG_UB too small to identify all the data");
            return MORSE_ERR_OUT_OF_RESOURCES;
Mathieu Faverge's avatar
Mathieu Faverge committed
        }

        _tag_mpi_initialized_ = 1;
        return MORSE_SUCCESS;
    }
    else {
        return MORSE_ERR_REINITIALIZED;
    }
}


#ifndef HAVE_STARPU_MPI_DATA_REGISTER
#define starpu_mpi_data_register( handle_, tag_, owner_ )       \
    do {                                                        \
        starpu_data_set_rank( (handle_), (owner_) );            \
        starpu_data_set_tag( (handle_), (tag_) );               \
void RUNTIME_comm_set_tag_sizes( int user_tag_width,
                                 int user_tag_sep )
{
THIBAULT Samuel's avatar
THIBAULT Samuel committed
#if defined(CHAMELEON_USE_MPI)
Mathieu Faverge's avatar
Mathieu Faverge committed
    int rc;
    rc = chameleon_starpu_tag_init( user_tag_width, user_tag_sep );
    if ( rc != MORSE_SUCCESS ) {
        morse_error("RUNTIME_user_tag_size",
                    "must be called before creating any Morse descriptor with MORSE_Desc_create(). The tag sizes will not be modified.");
    }
THIBAULT Samuel's avatar
THIBAULT Samuel committed
#endif
    (void)user_tag_width; (void)user_tag_sep;
 *  Malloc/Free of the data
#ifdef STARPU_MALLOC_SIMULATION_FOLDED
#define FOLDED STARPU_MALLOC_SIMULATION_FOLDED
#else
#define FOLDED 0
#endif
void *RUNTIME_malloc( size_t size )
{
#if defined(CHAMELEON_SIMULATION) && !defined(STARPU_MALLOC_SIMULATION_FOLDED) && !defined(CHAMELEON_USE_MPI)
    return (void*) 1;
#else
    if (starpu_malloc_flags(&ptr, size, STARPU_MALLOC_PINNED|FOLDED|STARPU_MALLOC_COUNT) != 0) {
void RUNTIME_free( void  *ptr,
                   size_t size )
{
#if defined(CHAMELEON_SIMULATION) && !defined(STARPU_MALLOC_SIMULATION_FOLDED) && !defined(CHAMELEON_USE_MPI)
THIBAULT Samuel's avatar
THIBAULT Samuel committed
    return;
    starpu_free_flags(ptr, size, STARPU_MALLOC_PINNED|FOLDED|STARPU_MALLOC_COUNT);
 *  Create data descriptor
void RUNTIME_desc_create( MORSE_desc_t *desc )
{
    int64_t lmt = desc->lmt;
    int64_t lnt = desc->lnt;
    starpu_data_handle_t *tiles;
     * Allocate starpu_handle_t array (handlers are initialized on the fly when
     * discovered by any algorithm to save space)
     */
    desc->schedopt = (void*)calloc(lnt*lmt,sizeof(starpu_data_handle_t));
    assert(desc->schedopt);
    tiles = (starpu_data_handle_t*)(desc->schedopt);

#if defined(CHAMELEON_USE_CUDA) && !defined(CHAMELEON_SIMULATION)
    /*
     * Register allocated memory as CUDA pinned memory
     */
    if ( (desc->use_mat == 1) && (desc->register_mat == 1) )
    {
        int64_t eltsze = MORSE_Element_Size(desc->dtyp);
        size_t size = (size_t)(desc->llm) * (size_t)(desc->lln) * eltsze;
        cudaError_t rc;
        /* Register the matrix as pinned memory */
        rc = cudaHostRegister( desc->mat, size, cudaHostRegisterPortable );
        if ( rc != cudaSuccess )
        {
            /* Disable the unregister as register failed */
            desc->register_mat = 0;
            morse_warning("RUNTIME_desc_create(StarPU): cudaHostRegister - ", cudaGetErrorString( rc ));
    if (desc->ooc) {
        int     lastmm   = desc->lm - (desc->lmt-1) * desc->mb;
        int     lastnn   = desc->ln - (desc->lnt-1) * desc->nb;
        int64_t eltsze   = MORSE_Element_Size(desc->dtyp);
        int     pagesize = getpagesize();

        if ( ((desc->mb * desc->nb * eltsze) % pagesize != 0) ||
             ((lastmm   * desc->nb * eltsze) % pagesize != 0) ||
             ((desc->mb * lastnn   * eltsze) % pagesize != 0) ||
             ((lastmm   * lastnn   * eltsze) % pagesize != 0) )
        {
            morse_error("RUNTIME_desc_create", "Matrix and tile size not suitable for out-of-core: all tiles have to be multiples of 4096. Tip : choose 'n' and 'nb' as both multiples of 32.");
            return;
        }
    }
#if defined(CHAMELEON_USE_MPI)
    /*
     * Check that we are not going over MPI tag limitations
     */
    {
Mathieu Faverge's avatar
Mathieu Faverge committed
        chameleon_starpu_tag_init( tag_width, tag_sep );

        /* Check that we won't create overflow in tags used */
        if ( ((uintptr_t)(lnt*lmt)) > ((uintptr_t)(1UL<<tag_sep)) ) {
            morse_fatal_error("RUNTIME_desc_create", "Too many tiles in the descriptor for MPI tags");
        if ( ((uintptr_t)desc->id) >= (uintptr_t)(1UL<<(tag_width-tag_sep)) ) {
            morse_fatal_error("RUNTIME_desc_create", "Number of descriptor available in MPI mode out of stock");
        assert( ((uintptr_t)desc->id) < (uintptr_t)(1UL<<(tag_width-tag_sep)) );
 *  Destroy data descriptor
void RUNTIME_desc_destroy( MORSE_desc_t *desc )
{
    desc->occurences--;

    /*
     * If this is the last descriptor using the matrix, we release the handle
     */
    if ( desc->occurences == 0 ) {
        starpu_data_handle_t *handle = (starpu_data_handle_t*)(desc->schedopt);
        int lmt = desc->lmt;
        int lnt = desc->lnt;
        int m, n;

        for (n = 0; n < lnt; n++) {
                {
                    handle++;
                    continue;
                }
                starpu_data_unregister(*handle);
                handle++;
            }
#if defined(CHAMELEON_USE_CUDA) && !defined(CHAMELEON_SIMULATION)
        if ( (desc->use_mat == 1) && (desc->register_mat == 1) )
        {
            /* Unmap the pinned memory associated to the matrix */
            if (cudaHostUnregister(desc->mat) != cudaSuccess)
            {
                morse_warning("RUNTIME_desc_destroy(StarPU)",
                              "cudaHostUnregister failed to unregister the "
                              "pinned memory associated to the matrix");
#endif /* defined(CHAMELEON_USE_CUDA) */
int RUNTIME_desc_acquire( const MORSE_desc_t *desc )
    starpu_data_handle_t *handle = (starpu_data_handle_t*)(desc->schedopt);
    int lmt = desc->lmt;
    int lnt = desc->lnt;
    int m, n;
    for (n = 0; n < lnt; n++) {
        for (m = 0; m < lmt; m++)
        {
            if ( (*handle == NULL) ||
                 !morse_desc_islocal( desc, m, n ) )
            {
                handle++;
                continue;
            }
            starpu_data_acquire(*handle, STARPU_R);
            handle++;
        }
    }
    return MORSE_SUCCESS;
int RUNTIME_desc_release( const MORSE_desc_t *desc )
{
    starpu_data_handle_t *handle = (starpu_data_handle_t*)(desc->schedopt);
    int lmt = desc->lmt;
    int lnt = desc->lnt;
    int m, n;

    for (n = 0; n < lnt; n++) {
        for (m = 0; m < lmt; m++)
        {
            if ( (*handle == NULL) ||
                 !morse_desc_islocal( desc, m, n ) )
            {
                handle++;
                continue;
            }
            starpu_data_release(*handle);
 *  Flush cached data
void RUNTIME_flush()
{
#if defined(CHAMELEON_USE_MPI)
    starpu_mpi_cache_flush_all_data(MPI_COMM_WORLD);
#endif
}

 * Different implementations of the flush call based on StarPU version
 */
#ifdef HAVE_STARPU_DATA_WONT_USE

static inline void
chameleon_starpu_data_wont_use( starpu_data_handle_t handle ) {
    starpu_data_wont_use( handle );
}

#elif defined HAVE_STARPU_IDLE_PREFETCH

static inline void
chameleon_starpu_data_flush( starpu_data_handle_t handle)
{
    starpu_data_idle_prefetch_on_node(handle, STARPU_MAIN_RAM, 1);
    starpu_data_release_on_node(handle, -1);
}

static inline void
chameleon_starpu_data_wont_use( starpu_data_handle_t handle ) {
    starpu_data_acquire_on_node_cb( handle, -1, STARPU_R,
                                    chameleon_starpu_data_flush, handle );
}

#else

static inline void
chameleon_starpu_data_wont_use( starpu_data_handle_t handle ) {
    starpu_data_acquire_cb( handle, STARPU_R,
                            (void (*)(void*))&starpu_data_release, handle );

#endif

void RUNTIME_desc_flush( const MORSE_desc_t     *desc,
                         const MORSE_sequence_t *sequence )
{
    starpu_data_handle_t *handle = (starpu_data_handle_t*)(desc->schedopt);
    int lmt = desc->lmt;
    int lnt = desc->lnt;
    int m, n;

    for (n = 0; n < lnt; n++)
    {
        for (m = 0; m < lmt; m++, handle++)
            if ( *handle == NULL ) {
#if defined(CHAMELEON_USE_MPI)
            starpu_mpi_cache_flush( MPI_COMM_WORLD, *handle );
#endif
            if ( morse_desc_islocal( desc, m, n ) ) {
                chameleon_starpu_data_wont_use( *handle );
            }
void RUNTIME_data_flush( const MORSE_sequence_t *sequence,
Mathieu Faverge's avatar
Mathieu Faverge committed
                         const MORSE_desc_t *A, int m, int n )
Mathieu Faverge's avatar
Mathieu Faverge committed
    int64_t mm = m + (A->i / A->mb);
    int64_t nn = n + (A->j / A->nb);
Mathieu Faverge's avatar
Mathieu Faverge committed
    starpu_data_handle_t *handle = A->schedopt;
    handle += ((int64_t)A->lmt) * nn + mm;
    if (*handle == NULL) {
        return;
#if defined(CHAMELEON_USE_MPI)
    starpu_mpi_cache_flush( MPI_COMM_WORLD, *handle );
#endif
Mathieu Faverge's avatar
Mathieu Faverge committed
    if ( morse_desc_islocal( A, m, n ) ) {
        chameleon_starpu_data_wont_use( *handle );
#if defined(CHAMELEON_USE_MIGRATE)
void RUNTIME_data_migrate( const MORSE_sequence_t *sequence,
                           const MORSE_desc_t *A, int Am, int An, int new_rank )
{
    starpu_data_handle_t *handle = (starpu_data_handle_t*)(A->schedopt);
    starpu_data_handle_t lhandle;
    handle += ((int64_t)(A->lmt) * (int64_t)An + (int64_t)Am);

    lhandle = *handle;
    if ( lhandle == NULL ) {
        /* Register the data */
        lhandle = RUNTIME_data_getaddr( A, Am, An );
    }

    starpu_mpi_data_migrate( MPI_COMM_WORLD, lhandle, new_rank );

    (void)sequence;
}
#endif

/* For older revision of StarPU, STARPU_MAIN_RAM is not defined */
#ifndef STARPU_MAIN_RAM
#define STARPU_MAIN_RAM 0
#endif

Mathieu Faverge's avatar
Mathieu Faverge committed
void *RUNTIME_data_getaddr( const MORSE_desc_t *A, int m, int n )
Mathieu Faverge's avatar
Mathieu Faverge committed
    int64_t mm = m + (A->i / A->mb);
    int64_t nn = n + (A->j / A->nb);
Mathieu Faverge's avatar
Mathieu Faverge committed
    starpu_data_handle_t *ptrtile = A->schedopt;
    ptrtile += ((int64_t)A->lmt) * nn + mm;
        int home_node = -1;
        void *user_ptr = NULL;
Mathieu Faverge's avatar
Mathieu Faverge committed
        int myrank = A->myrank;
        int owner  = A->get_rankof( A, m, n );
        int64_t eltsze = MORSE_Element_Size(A->dtyp);
        int tempmm = (mm == A->lmt-1) ? (A->lm - mm * A->mb) : A->mb;
        int tempnn = (nn == A->lnt-1) ? (A->ln - nn * A->nb) : A->nb;
Mathieu Faverge's avatar
Mathieu Faverge committed
            user_ptr = A->get_blkaddr(A, m, n);
            if ( user_ptr != NULL ) {
                home_node = STARPU_MAIN_RAM;
        starpu_matrix_data_register( ptrtile, home_node, (uintptr_t) user_ptr,
Mathieu Faverge's avatar
Mathieu Faverge committed
                                     BLKLDD(A, m),
                                     tempmm, tempnn, eltsze );
#ifdef HAVE_STARPU_DATA_SET_COORDINATES
        starpu_data_set_coordinates( *ptrtile, 2, m, n );
#if defined(CHAMELEON_USE_MPI)
Mathieu Faverge's avatar
Mathieu Faverge committed
            int64_t block_ind = A->lmt * nn + mm;
            starpu_mpi_data_register(*ptrtile, (A->id << tag_sep) | (block_ind), owner);
#endif /* defined(CHAMELEON_USE_MPI) */
    return *ptrtile;