chameleon_starpu.h.in 5.51 KB
Newer Older
1
/**
2 3
 *
 * @file chameleon_starpu.h
4
 *
Mathieu Faverge's avatar
Mathieu Faverge committed
5 6
 * @copyright 2009-2014 The University of Tennessee and The University of
 *                      Tennessee Research Foundation. All rights reserved.
7 8
 * @copyright 2012-2016 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
 *                      Univ. Bordeaux. All rights reserved.
9
 *
Mathieu Faverge's avatar
Mathieu Faverge committed
10
 ***
11
 *
Mathieu Faverge's avatar
Mathieu Faverge committed
12
 * @brief Chameleon StarPU runtime header
13
 *
Mathieu Faverge's avatar
Mathieu Faverge committed
14
 * @version 1.0.0
15 16
 * @author Mathieu Faverge
 * @author Cedric Castagnede
17
 * @author Florent Pruvost
18 19
 * @date 2011-06-01
 *
20
 */
21 22 23
#ifndef _MORSE_STARPU_H_
#define _MORSE_STARPU_H_

24
#include "chameleon/chameleon_config.h"
PRUVOST Florent's avatar
@all  
PRUVOST Florent committed
25

26 27 28 29 30 31 32
/* StarPU options */
#cmakedefine HAVE_STARPU_FXT_PROFILING
#cmakedefine HAVE_STARPU_IDLE_PREFETCH
#cmakedefine HAVE_STARPU_ITERATION_PUSH
#cmakedefine HAVE_STARPU_DATA_WONT_USE
#cmakedefine HAVE_STARPU_DATA_SET_COORDINATES
#cmakedefine HAVE_STARPU_MALLOC_ON_NODE_SET_DEFAULT_FLAGS
33
#cmakedefine HAVE_STARPU_MPI_DATA_MIGRATE
34 35 36
#cmakedefine HAVE_STARPU_MPI_DATA_REGISTER
#cmakedefine HAVE_STARPU_MPI_COMM_RANK
#cmakedefine HAVE_STARPU_MPI_CACHED_RECEIVE
Mathieu Faverge's avatar
Mathieu Faverge committed
37
#cmakedefine HAVE_STARPU_MPI_COMM_GET_ATTR
38

39
#if defined(CHAMELEON_USE_MPI)
40 41 42 43 44 45 46
#include <starpu_mpi.h>
#else
#include <starpu.h>
#endif

#include <starpu_profiling.h>

47
#if defined(CHAMELEON_USE_CUDA) && !defined(CHAMELEON_SIMULATION)
48 49
#include <starpu_scheduler.h>
#include <starpu_cuda.h>
50 51 52 53 54 55 56

#include <cublas.h>
#include <starpu_cublas.h>
#if defined(CHAMELEON_USE_CUBLAS_V2)
#include <cublas_v2.h>
#include <starpu_cublas_v2.h>
#endif
57 58
#endif

59 60 61 62 63 64 65 66 67 68
#if defined(CHAMELEON_SIMULATION)
# if !defined(STARPU_SIMGRID)
#  error "Starpu was not built with simgrid support (--enable-simgrid). Can not run Chameleon with simulation support."
# endif
#else
# if defined(STARPU_SIMGRID)
#  warning "Starpu was built with simgrid support. Better build Chameleon with simulation support (-DCHAMELEON_SIMULATION=YES)."
# endif
#endif

69
#include "control/common.h"
70 71 72 73
#include "runtime_codelets.h"
#include "runtime_profiling.h"
#include "runtime_codelet_profile.h"
#include "runtime_workspace.h"
74 75 76

typedef struct starpu_conf starpu_conf_t;

77
/**/
78 79 80 81

/*
 * MPI Redefinitions
 */
82
#if defined(CHAMELEON_USE_MPI)
83
#undef STARPU_REDUX
84 85 86 87 88 89 90 91
//#define starpu_insert_task(...) starpu_mpi_insert_task(MPI_COMM_WORLD, __VA_ARGS__)
#define starpu_insert_task starpu_mpi_insert_task
#define starpu_mpi_codelet(_codelet_) MPI_COMM_WORLD, _codelet_

#else

#define starpu_mpi_codelet(_codelet_) _codelet_

92 93
#endif

94 95 96 97 98 99 100 101 102 103 104 105 106
/*
 * cuBlasAPI v2 - StarPU enable the support for cublas handle
 */
#if defined(CHAMELEON_USE_CUDA) && defined(CHAMELEON_USE_CUBLAS_V2)
#define RUNTIME_getStream(_stream_)                             \
    cublasHandle_t _stream_ = starpu_cublas_get_local_handle();
#else
#define RUNTIME_getStream(_stream_)                             \
    cudaStream_t _stream_ = starpu_cuda_get_local_stream();     \
    cublasSetKernelStream( stream );

#endif

107
/*
108 109 110 111 112 113 114
 * Enable codelets names
 */
#if (STARPU_MAJOR_VERSION > 1) || ((STARPU_MAJOR_VERSION == 1) && (STARPU_MINOR_VERSION > 1))
#define CHAMELEON_CODELETS_HAVE_NAME
#endif

/**
115 116
 * Access to block pointer and leading dimension
 */
117
#define RTBLKADDR( desc, type, m, n ) ( (starpu_data_handle_t)RUNTIME_data_getaddr( desc, m, n ) )
118 119

void RUNTIME_set_reduction_methods(starpu_data_handle_t handle, MORSE_enum dtyp);
120

121 122
#if defined(CHAMELEON_USE_MPI) && defined(HAVE_STARPU_MPI_CACHED_RECEIVE)
static inline int
Mathieu Faverge's avatar
Mathieu Faverge committed
123
chameleon_starpu_data_iscached(const MORSE_desc_t *A, int m, int n)
124
{
Mathieu Faverge's avatar
Mathieu Faverge committed
125 126 127 128 129
    int64_t mm = m + (A->i / A->mb);
    int64_t nn = n + (A->j / A->nb);

    starpu_data_handle_t *ptrtile = A->schedopt;
    ptrtile += ((int64_t)A->lmt) * nn + mm;
130 131 132 133 134 135 136 137 138 139

    if (!(*ptrtile))
        return 0;

    return starpu_mpi_cached_receive(*ptrtile);
}

#define RUNTIME_ACCESS_WRITE_CACHED(A, Am, An) do {                 \
        if (chameleon_starpu_data_iscached(A, Am, An)) __morse_need_submit = 1; } while(0)

140
#else
141 142 143 144

#warning "WAR dependencies need starpu_mpi_cached_receive support from StarPU 1.2.1 or greater"
#define RUNTIME_ACCESS_WRITE_CACHED(A, Am, An) do {} while (0)

145
#endif
146

147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170
#ifdef CHAMELEON_ENABLE_PRUNING_STATS

#define RUNTIME_PRUNING_STATS_BEGIN_ACCESS_DECLARATION \
    int __morse_exec = 0; \
    int __morse_changed = 0;

#define RUNTIME_PRUNING_STATS_ACCESS_W(A, Am, An) \
    if (morse_desc_islocal(A, Am, An)) \
        __morse_exec = 1;

#define RUNTIME_PRUNING_STATS_END_ACCESS_DECLARATION \
    RUNTIME_total_tasks++; \
    if (__morse_exec) \
        RUNTIME_exec_tasks++; \
    else if (__morse_need_submit) \
        RUNTIME_comm_tasks++; \
    else if (__morse_changed) \
        RUNTIME_changed_tasks++;

#define RUNTIME_PRUNING_STATS_RANK_CHANGED(rank) \
    int __morse_myrank; \
    RUNTIME_comm_rank(&__morse_myrank); \
    __morse_exec = (rank) == __morse_myrank; \
    __morse_changed = 1; \
171

172 173 174 175
#else
#define RUNTIME_PRUNING_STATS_BEGIN_ACCESS_DECLARATION
#define RUNTIME_PRUNING_STATS_ACCESS_W(A, Am, An)
#define RUNTIME_PRUNING_STATS_END_ACCESS_DECLARATION
176
#define RUNTIME_PRUNING_STATS_RANK_CHANGED(rank)
177 178
#endif

179
#define RUNTIME_BEGIN_ACCESS_DECLARATION        \
180 181
    RUNTIME_PRUNING_STATS_BEGIN_ACCESS_DECLARATION

182
#define RUNTIME_ACCESS_R(A, Am, An)
183

184 185 186
#define RUNTIME_ACCESS_W(A, Am, An)             \
    RUNTIME_PRUNING_STATS_ACCESS_W(A, Am, An);  \
    RUNTIME_ACCESS_WRITE_CACHED(A, Am, An)
187

188 189 190
#define RUNTIME_ACCESS_RW(A, Am, An)            \
    RUNTIME_PRUNING_STATS_ACCESS_W(A, Am, An);  \
    RUNTIME_ACCESS_WRITE_CACHED(A, Am, An)
191

192
#define RUNTIME_RANK_CHANGED(rank)              \
193
    RUNTIME_PRUNING_STATS_RANK_CHANGED(rank)
194

195
#define RUNTIME_END_ACCESS_DECLARATION          \
196
    RUNTIME_PRUNING_STATS_END_ACCESS_DECLARATION;
197

198
#endif /* _MORSE_STARPU_H_ */