timing.h 10.7 KB
Newer Older
1
/**
2 3
 *
 * @file timing.h
4
 *
Mathieu Faverge's avatar
Mathieu Faverge committed
5 6
 * @copyright 2009-2014 The University of Tennessee and The University of
 *                      Tennessee Research Foundation. All rights reserved.
7 8
 * @copyright 2012-2015 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
 *                      Univ. Bordeaux. All rights reserved.
9
 *
Mathieu Faverge's avatar
Mathieu Faverge committed
10 11
 * @version 1.0.0
 *
12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32
 **/
#ifndef TIMING_H
#define TIMING_H

typedef double morse_time_t;

enum iparam_timing {
    IPARAM_THRDNBR,        /* Number of cores                            */
    IPARAM_THRDNBR_SUBGRP, /* Number of cores in a subgroup (NUMA node)  */
    IPARAM_SCHEDULER,      /* What scheduler do we choose (dyn, stat)    */
    IPARAM_M,              /* Number of rows of the matrix               */
    IPARAM_N,              /* Number of columns of the matrix            */
    IPARAM_K,              /* RHS or K                                   */
    IPARAM_LDA,            /* Leading dimension of A                     */
    IPARAM_LDB,            /* Leading dimension of B                     */
    IPARAM_LDC,            /* Leading dimension of C                     */
    IPARAM_IB,             /* Inner-blocking size                        */
    IPARAM_NB,             /* Number of columns in a tile                */
    IPARAM_MB,             /* Number of rows in a tile                   */
    IPARAM_NITER,          /* Number of iteration of each test           */
    IPARAM_WARMUP,         /* Run one test to load dynamic libraries     */
33
    IPARAM_BIGMAT,         /* Allocating one big mat or plenty of small  */
34 35 36 37 38 39 40 41
    IPARAM_CHECK,          /* Checking activated or not                  */
    IPARAM_VERBOSE,        /* How much noise do we want?                 */
    IPARAM_AUTOTUNING,     /* Disable/enable autotuning                  */
    IPARAM_INPUTFMT,       /* Input format (Use only for getmi/gecfi)    */
    IPARAM_OUTPUTFMT,      /* Output format (Use only for getmi/gecfi)   */
    IPARAM_TRACE,          /* Generate trace on the first non warmup run */
    IPARAM_DAG,            /* Do we require to output the DOT file?      */
    IPARAM_ASYNC,          /* Asynchronous calls                         */
THIBAULT Samuel's avatar
THIBAULT Samuel committed
42
    IPARAM_OOC,            /* Out of Core                                */
43 44 45 46
    IPARAM_MX,             /* */
    IPARAM_NX,             /* */
    IPARAM_RHBLK,          /* Householder reduction parameter for QR/LQ  */
    IPARAM_INPLACE,        /* InPlace/OutOfPlace translation mode        */
47
    IPARAM_MODE,           /* Eigenvalue generation mode                 */
48 49 50

    IPARAM_INVERSE,
    IPARAM_NCUDAS,
51
    IPARAM_NMPI,
52 53
    IPARAM_P,              /* Parameter for 2D cyclic distribution       */
    IPARAM_Q,              /* Parameter for 2D cyclic distribution       */
54 55

    IPARAM_PROGRESS,       /* Use a progress indicator during computations */
56
    IPARAM_GEMM3M,         /* Use GEMM3M for complex matrix vector products */
57 58
    /* Added for StarPU version */
    IPARAM_PROFILE,
59
    IPARAM_PRINT_WARNINGS,
60 61 62 63 64
    IPARAM_PEAK,
    IPARAM_PARALLEL_TASKS,
    IPARAM_NO_CPU,
    IPARAM_BOUND,
    /* End */
65 66 67 68 69 70 71 72
    /* Added for libhqr version */
    IPARAM_LOWLVL_TREE,  /* Tree used for reduction inside nodes        */
    IPARAM_HIGHLVL_TREE, /* Tree used for reduction between nodes       */
    IPARAM_QR_TS_SZE,    /* Size of TS domain                           */
    IPARAM_QR_HLVL_SZE,  /* Size of the high level tree                 */
    IPARAM_QR_DOMINO,    /* Enable/disable the domino tree              */
    IPARAM_QR_TSRR,      /* Enable/disable the round-robin on TS domain */
    /* End */
73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96
    IPARAM_SIZEOF
};

enum dparam_timing {
  IPARAM_TIME,
  IPARAM_ANORM,
  IPARAM_BNORM,
  IPARAM_XNORM,
  IPARAM_RNORM,
  IPARAM_AinvNORM,
  IPARAM_ESTIMATED_PEAK,
  IPARAM_RES,
  /* Begin section for hydra integration tool */
  IPARAM_THRESHOLD_CHECK, /* Maximum value accepted for: |Ax-b||/N/eps/(||A||||x||+||b||) */
  /* End section for hydra integration tool  */
  IPARAM_DNBPARAM
};

#define PASTE_CODE_IPARAM_LOCALS(iparam)           \
    double  t;                                     \
    int64_t M     = iparam[IPARAM_M];              \
    int64_t N     = iparam[IPARAM_N];              \
    int64_t K     = iparam[IPARAM_K];              \
    int64_t NRHS  = K;                             \
97 98 99
    int64_t LDA   = chameleon_max(M, iparam[IPARAM_LDA]);    \
    int64_t LDB   = chameleon_max(N, iparam[IPARAM_LDB]);    \
    int64_t LDC   = chameleon_max(K, iparam[IPARAM_LDC]);    \
100 101 102
    int64_t IB    = iparam[IPARAM_IB];             \
    int64_t MB    = iparam[IPARAM_MB];             \
    int64_t NB    = iparam[IPARAM_NB];             \
103 104
    int64_t P     = iparam[IPARAM_P];              \
    int64_t Q     = iparam[IPARAM_Q];              \
105 106
    int64_t MT    = (M%MB==0) ? (M/MB) : (M/MB+1); \
    int64_t NT    = (N%NB==0) ? (N/NB) : (N/NB+1); \
107
    int bigmat     = iparam[IPARAM_BIGMAT];         \
THIBAULT Samuel's avatar
THIBAULT Samuel committed
108
    int ooc       = iparam[IPARAM_OOC];            \
109 110 111 112
    int check     = iparam[IPARAM_CHECK];          \
    int loud      = iparam[IPARAM_VERBOSE];        \
    (void)M;(void)N;(void)K;(void)NRHS;            \
    (void)LDA;(void)LDB;(void)LDC;                 \
113
    (void)IB;(void)MB;(void)NB;(void)P;(void)Q;    \
114 115
    (void)MT;(void)NT;(void)check;                 \
    (void)loud;(void)bigmat;(void)ooc;
116 117 118 119

/* Paste code to allocate a matrix in desc if cond_init is true */
#define PASTE_CODE_ALLOCATE_MATRIX_TILE(_desc_, _cond_, _type_, _type2_, _lda_, _m_, _n_) \
    MORSE_desc_t *_desc_ = NULL;                                        \
120
    int status ## _desc_ ;                                              \
121
    if( _cond_ ) {                                                      \
122 123 124 125 126 127 128 129 130 131
        if (ooc)                                                        \
            status ## _desc_ = MORSE_Desc_Create_OOC(&(_desc_), _type2_, MB, NB, MB*NB, _lda_, _n_, 0, 0, _m_, _n_, \
                                                     P, Q);             \
        else if (!bigmat)                                               \
            status ## _desc_ = MORSE_Desc_Create_User(&(_desc_), NULL, _type2_, MB, NB, MB*NB, _lda_, _n_, 0, 0, _m_, _n_, \
                                                      P, Q, morse_getaddr_null, NULL, NULL); \
        else                                                            \
            status ## _desc_ = MORSE_Desc_Create(&(_desc_), NULL, _type2_, MB, NB, MB*NB, _lda_, _n_, 0, 0, _m_, _n_, \
                                                 P, Q);                 \
        if (status ## _desc_ != MORSE_SUCCESS) return (status ## _desc_); \
132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174
    }

#define PASTE_CODE_FREE_MATRIX(_desc_)                                  \
    MORSE_Desc_Destroy( &_desc_ );

#define PASTE_TILE_TO_LAPACK(_desc_, _name_, _cond_, _type_, _lda_, _n_) \
    _type_ *_name_ = NULL;                                               \
    if ( _cond_ ) {                                                      \
        _name_ = (_type_*)malloc( (_lda_) * (_n_) * sizeof(_type_));     \
        if ( ! _name_ ) {                                                \
            fprintf(stderr, "Out of Memory for %s\n", #_name_);          \
            return -1;                                                   \
        }                                                                \
        MORSE_Tile_to_Lapack(_desc_, (void*)_name_, _lda_);              \
    }

#define PASTE_CODE_ALLOCATE_MATRIX(_name_, _cond_, _type_, _lda_, _n_)  \
    _type_ *_name_ = NULL;                                              \
    if( _cond_ ) {                                                      \
        _name_ = (_type_*)malloc( (_lda_) * (_n_) * sizeof(_type_) );   \
        if ( ! _name_ ) {                                               \
            fprintf(stderr, "Out of Memory for %s\n", #_name_);         \
            return -1;                                                  \
        }                                                               \
    }

#define PASTE_CODE_ALLOCATE_COPY(_name_, _cond_, _type_, _orig_, _lda_, _n_) \
    _type_ *_name_ = NULL;                                                   \
    if( _cond_ ) {                                                           \
        _name_ = (_type_*)malloc( (_lda_) * (_n_) * sizeof(_type_) );        \
        if ( ! _name_ ) {                                                    \
            fprintf(stderr, "Out of Memory for %s\n", #_name_);              \
            return -1;                                                       \
        }                                                                    \
        memcpy(_name_, _orig_, (_lda_) * (_n_) * sizeof(_type_) );           \
    }

/*********************
 *
 * Macro for trace generation
 *
 */
#define START_TRACING()                        \
175
    RUNTIME_start_stats();                     \
176 177 178 179 180 181 182 183
    if(iparam[IPARAM_TRACE] == 2) {            \
    	RUNTIME_start_profiling();             \
    }                                          \
    if(iparam[IPARAM_BOUND]) {                 \
        MORSE_Enable(MORSE_BOUND);             \
    }

#define STOP_TRACING()                         \
184
    RUNTIME_stop_stats();                      \
185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214
    if(iparam[IPARAM_TRACE] == 2) {            \
    	RUNTIME_stop_profiling();              \
    }                                          \
    if(iparam[IPARAM_BOUND]) {                 \
        MORSE_Disable(MORSE_BOUND);            \
    }

/*********************
 *
 * Macro for DAG generation
 *
 */
#if 0
#define START_DAG()                   \
    if ( iparam[IPARAM_DAG] == 2 )    \
        MORSE_Enable(MORSE_DAG);

#define STOP_DAG()                    \
    if ( iparam[IPARAM_DAG] == 2 )    \
        MORSE_Disable(MORSE_DAG);
#else
#define START_DAG()  do {} while(0);
#define STOP_DAG()   do {} while(0);
#endif

/*********************
 *
 * Synchro for distributed computations
 *
 */
215
#if defined(CHAMELEON_USE_MPI)
216 217
#define START_DISTRIBUTED()  MORSE_Distributed_start();
#define STOP_DISTRIBUTED()   MORSE_Distributed_stop();
218 219 220 221 222 223 224 225 226 227
#else
#define START_DISTRIBUTED()  do {} while(0);
#define STOP_DISTRIBUTED()   do {} while(0);
#endif

/*********************
 *
 * General Macros for timing
 *
 */
228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243
#define START_TIMING()                          \
    START_DAG();                                \
    START_TRACING();                            \
    START_DISTRIBUTED();                        \
    t = -RUNTIME_get_time();

#define STOP_TIMING()                           \
    STOP_DISTRIBUTED();                         \
    t += RUNTIME_get_time();                    \
    STOP_TRACING();                             \
    STOP_DAG();                                 \
    if (iparam[IPARAM_PROFILE] == 2) {          \
        RUNTIME_kernelprofile_display();        \
        RUNTIME_schedprofile_display();         \
    }                                           \
    *t_ = t;
244 245

#endif /* TIMING_H */