Mentions légales du service

Skip to content
Snippets Groups Projects
morse_starpu.h 5 KiB
Newer Older
 * @copyright (c) 2009-2014 The University of Tennessee and The University
 *                          of Tennessee Research Foundation.
 * @copyright (c) 2012-2016 Inria. All rights reserved.
 * @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved.
 *
 **/

/**
 *
 * @file morse_starpu.h
 *
 *  MORSE codelets kernel
 *  MORSE is a software package provided by Univ. of Tennessee,
 *  Univ. of California Berkeley and Univ. of Colorado Denver,
 *  and INRIA Bordeaux Sud-Ouest
 *
 * @version 0.9.0
 * @author Mathieu Faverge
 * @author Cedric Castagnede
 * @date 2011-06-01
 *
 **/

/******************************************************************************/

/*
 *  MORSE facilities of interest to both src and magmablas directories
 **/
#ifndef _MORSE_STARPU_H_
#define _MORSE_STARPU_H_

PRUVOST Florent's avatar
PRUVOST Florent committed
#include "include/chameleon_config.h"

#if defined(CHAMELEON_USE_MPI)
#include <starpu_mpi.h>
#else
#include <starpu.h>
#endif

#include <starpu_profiling.h>

#if defined(CHAMELEON_USE_CUDA) && !defined(CHAMELEON_SIMULATION)
#include <starpu_scheduler.h>
#include <starpu_cuda.h>

#include <cublas.h>
#include <starpu_cublas.h>
#if defined(CHAMELEON_USE_CUBLAS_V2)
#include <cublas_v2.h>
#include <starpu_cublas_v2.h>
#endif
#include "runtime/starpu/include/runtime_codelets.h"
#include "runtime/starpu/include/runtime_profiling.h"
#include "runtime/starpu/include/runtime_codelet_profile.h"
#include "runtime/starpu/include/runtime_workspace.h"

typedef struct starpu_conf starpu_conf_t;

/******************************************************************************/

/*
 * MPI Redefinitions
 */
#if defined(CHAMELEON_USE_MPI)
//#define starpu_insert_task(...) starpu_mpi_insert_task(MPI_COMM_WORLD, __VA_ARGS__)
#define starpu_insert_task starpu_mpi_insert_task
#define starpu_mpi_codelet(_codelet_) MPI_COMM_WORLD, _codelet_

#else

#define starpu_mpi_codelet(_codelet_) _codelet_

/*
 * cuBlasAPI v2 - StarPU enable the support for cublas handle
 */
#if defined(CHAMELEON_USE_CUDA) && defined(CHAMELEON_USE_CUBLAS_V2)
#define RUNTIME_getStream(_stream_)                             \
    cublasHandle_t _stream_ = starpu_cublas_get_local_handle();
#else
#define RUNTIME_getStream(_stream_)                             \
    cudaStream_t _stream_ = starpu_cuda_get_local_stream();     \
    cublasSetKernelStream( stream );

#endif

 * Enable codelets names
 */
#if (STARPU_MAJOR_VERSION > 1) || ((STARPU_MAJOR_VERSION == 1) && (STARPU_MINOR_VERSION > 1))
#define CHAMELEON_CODELETS_HAVE_NAME
#endif

/**
 * Access to block pointer and leading dimension
 */
#define RTBLKADDR( desc, type, m, n ) ( (starpu_data_handle_t)RUNTIME_desc_getaddr( desc, m, n ) )

void RUNTIME_set_reduction_methods(starpu_data_handle_t handle, MORSE_enum dtyp);
#ifdef CHAMELEON_USE_MPI
#ifdef HAVE_STARPU_MPI_CACHED_RECEIVE
int RUNTIME_desc_iscached(const MORSE_desc_t *A, int Am, int An);
#endif
#endif

#if defined(CHAMELEON_USE_MPI)
#  if defined(HAVE_STARPU_MPI_CACHED_RECEIVE)
#    define RUNTIME_ACCESS_WRITE_CACHED(A, Am, An) do { if (RUNTIME_desc_iscached(A, Am, An)) __morse_need_submit = 1; } while(0)
#  else
#    warning "WAR dependencies need starpu_mpi_cached_receive support from StarPU 1.2.1 or greater"
#    define RUNTIME_ACCESS_WRITE_CACHED(A, Am, An)
#  endif
#else
#define RUNTIME_ACCESS_WRITE_CACHED(A, Am, An)
#endif
#ifdef CHAMELEON_ENABLE_PRUNING_STATS

#define RUNTIME_PRUNING_STATS_BEGIN_ACCESS_DECLARATION \
    int __morse_exec = 0; \
    int __morse_changed = 0;

#define RUNTIME_PRUNING_STATS_ACCESS_W(A, Am, An) \
    if (morse_desc_islocal(A, Am, An)) \
        __morse_exec = 1;

#define RUNTIME_PRUNING_STATS_END_ACCESS_DECLARATION \
    RUNTIME_total_tasks++; \
    if (__morse_exec) \
        RUNTIME_exec_tasks++; \
    else if (__morse_need_submit) \
        RUNTIME_comm_tasks++; \
    else if (__morse_changed) \
        RUNTIME_changed_tasks++;

#define RUNTIME_PRUNING_STATS_RANK_CHANGED(rank) \
    int __morse_myrank; \
    RUNTIME_comm_rank(&__morse_myrank); \
    __morse_exec = (rank) == __morse_myrank; \
    __morse_changed = 1; \
#else
#define RUNTIME_PRUNING_STATS_BEGIN_ACCESS_DECLARATION
#define RUNTIME_PRUNING_STATS_ACCESS_W(A, Am, An)
#define RUNTIME_PRUNING_STATS_END_ACCESS_DECLARATION
#define RUNTIME_PRUNING_STATS_RANK_CHANGED(rank)
#define RUNTIME_BEGIN_ACCESS_DECLARATION        \
    RUNTIME_PRUNING_STATS_BEGIN_ACCESS_DECLARATION

#define RUNTIME_ACCESS_R(A, Am, An)
#define RUNTIME_ACCESS_W(A, Am, An)             \
    RUNTIME_PRUNING_STATS_ACCESS_W(A, Am, An);  \
    RUNTIME_ACCESS_WRITE_CACHED(A, Am, An)
#define RUNTIME_ACCESS_RW(A, Am, An)            \
    RUNTIME_PRUNING_STATS_ACCESS_W(A, Am, An);  \
    RUNTIME_ACCESS_WRITE_CACHED(A, Am, An)
#define RUNTIME_RANK_CHANGED(rank)              \
    RUNTIME_PRUNING_STATS_RANK_CHANGED(rank)
#define RUNTIME_END_ACCESS_DECLARATION          \
    RUNTIME_PRUNING_STATS_END_ACCESS_DECLARATION;