timing.h 9.49 KB
Newer Older
1 2
/**
 *
3 4
 * @copyright (c) 2009-2014 The University of Tennessee and The University
 *                          of Tennessee Research Foundation.
5 6
 *                          All rights reserved.
 * @copyright (c) 2012-2014 Inria. All rights reserved.
7
 * @copyright (c) 2012-2015 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved.
8 9 10 11 12 13 14
 *
 **/

#ifndef TIMING_H
#define TIMING_H

typedef double morse_time_t;
15
static void* morse_getaddr_null(const MORSE_desc_t *A, int m, int n);
16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31

enum iparam_timing {
    IPARAM_THRDNBR,        /* Number of cores                            */
    IPARAM_THRDNBR_SUBGRP, /* Number of cores in a subgroup (NUMA node)  */
    IPARAM_SCHEDULER,      /* What scheduler do we choose (dyn, stat)    */
    IPARAM_M,              /* Number of rows of the matrix               */
    IPARAM_N,              /* Number of columns of the matrix            */
    IPARAM_K,              /* RHS or K                                   */
    IPARAM_LDA,            /* Leading dimension of A                     */
    IPARAM_LDB,            /* Leading dimension of B                     */
    IPARAM_LDC,            /* Leading dimension of C                     */
    IPARAM_IB,             /* Inner-blocking size                        */
    IPARAM_NB,             /* Number of columns in a tile                */
    IPARAM_MB,             /* Number of rows in a tile                   */
    IPARAM_NITER,          /* Number of iteration of each test           */
    IPARAM_WARMUP,         /* Run one test to load dynamic libraries     */
32
    IPARAM_BIGMAT,         /* Allocating one big mat or plenty of small  */
33 34 35 36 37 38 39 40 41 42 43 44
    IPARAM_CHECK,          /* Checking activated or not                  */
    IPARAM_VERBOSE,        /* How much noise do we want?                 */
    IPARAM_AUTOTUNING,     /* Disable/enable autotuning                  */
    IPARAM_INPUTFMT,       /* Input format (Use only for getmi/gecfi)    */
    IPARAM_OUTPUTFMT,      /* Output format (Use only for getmi/gecfi)   */
    IPARAM_TRACE,          /* Generate trace on the first non warmup run */
    IPARAM_DAG,            /* Do we require to output the DOT file?      */
    IPARAM_ASYNC,          /* Asynchronous calls                         */
    IPARAM_MX,             /* */
    IPARAM_NX,             /* */
    IPARAM_RHBLK,          /* Householder reduction parameter for QR/LQ  */
    IPARAM_INPLACE,        /* InPlace/OutOfPlace translation mode        */
45
    IPARAM_MODE,           /* Eigenvalue generation mode                 */
46 47 48

    IPARAM_INVERSE,
    IPARAM_NCUDAS,
49
    IPARAM_NMPI,
50 51
    IPARAM_P,              /* Parameter for 2D cyclic distribution       */
    IPARAM_Q,              /* Parameter for 2D cyclic distribution       */
52 53

    IPARAM_PROGRESS,       /* Use a progress indicator during computations */
54
    IPARAM_GEMM3M,         /* Use GEMM3M for complex matrix vector products */
55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88
    /* Added for StarPU version */
    IPARAM_PROFILE,
    IPARAM_PRINT_ERRORS,
    IPARAM_PEAK,
    IPARAM_PARALLEL_TASKS,
    IPARAM_NO_CPU,
    IPARAM_BOUND,
    IPARAM_BOUNDDEPS,
    IPARAM_BOUNDDEPSPRIO,
    /* End */
    IPARAM_SIZEOF
};

enum dparam_timing {
  IPARAM_TIME,
  IPARAM_ANORM,
  IPARAM_BNORM,
  IPARAM_XNORM,
  IPARAM_RNORM,
  IPARAM_AinvNORM,
  IPARAM_ESTIMATED_PEAK,
  IPARAM_RES,
  /* Begin section for hydra integration tool */
  IPARAM_THRESHOLD_CHECK, /* Maximum value accepted for: |Ax-b||/N/eps/(||A||||x||+||b||) */
  /* End section for hydra integration tool  */
  IPARAM_DNBPARAM
};

#define PASTE_CODE_IPARAM_LOCALS(iparam)           \
    double  t;                                     \
    int64_t M     = iparam[IPARAM_M];              \
    int64_t N     = iparam[IPARAM_N];              \
    int64_t K     = iparam[IPARAM_K];              \
    int64_t NRHS  = K;                             \
89 90 91
    int64_t LDA   = chameleon_max(M, iparam[IPARAM_LDA]);    \
    int64_t LDB   = chameleon_max(N, iparam[IPARAM_LDB]);    \
    int64_t LDC   = chameleon_max(K, iparam[IPARAM_LDC]);    \
92 93 94
    int64_t IB    = iparam[IPARAM_IB];             \
    int64_t MB    = iparam[IPARAM_MB];             \
    int64_t NB    = iparam[IPARAM_NB];             \
95 96
    int64_t P     = iparam[IPARAM_P];              \
    int64_t Q     = iparam[IPARAM_Q];              \
97 98
    int64_t MT    = (M%MB==0) ? (M/MB) : (M/MB+1); \
    int64_t NT    = (N%NB==0) ? (N/NB) : (N/NB+1); \
99
    int bigmat     = iparam[IPARAM_BIGMAT];         \
100 101 102 103
    int check     = iparam[IPARAM_CHECK];          \
    int loud      = iparam[IPARAM_VERBOSE];        \
    (void)M;(void)N;(void)K;(void)NRHS;            \
    (void)LDA;(void)LDB;(void)LDC;                 \
104
    (void)IB;(void)MB;(void)NB;(void)P;(void)Q;    \
105
    (void)MT;(void)NT;(void)check;(void)loud;(void)bigmat;
106 107 108 109

/* Paste code to allocate a matrix in desc if cond_init is true */
#define PASTE_CODE_ALLOCATE_MATRIX_TILE(_desc_, _cond_, _type_, _type2_, _lda_, _m_, _n_) \
    MORSE_desc_t *_desc_ = NULL;                                        \
110
    int status ## _desc_ ; \
111
    if( _cond_ ) {                                                      \
112
       if (!bigmat) \
113
           status ## _desc_ = MORSE_Desc_Create_User(&(_desc_), NULL, _type2_, MB, NB, MB*NB, _lda_, _n_, 0, 0, _m_, _n_, \
114 115
                          P, Q, morse_getaddr_null, NULL, NULL);\
       else \
116
           status ## _desc_ = MORSE_Desc_Create(&(_desc_), NULL, _type2_, MB, NB, MB*NB, _lda_, _n_, 0, 0, _m_, _n_, \
117
                    P, Q);\
118
        if (status ## _desc_ != MORSE_SUCCESS) return (status ## _desc_);          \
119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199
    }

#define PASTE_CODE_FREE_MATRIX(_desc_)                                  \
    MORSE_Desc_Destroy( &_desc_ );

#define PASTE_TILE_TO_LAPACK(_desc_, _name_, _cond_, _type_, _lda_, _n_) \
    _type_ *_name_ = NULL;                                               \
    if ( _cond_ ) {                                                      \
        _name_ = (_type_*)malloc( (_lda_) * (_n_) * sizeof(_type_));     \
        if ( ! _name_ ) {                                                \
            fprintf(stderr, "Out of Memory for %s\n", #_name_);          \
            return -1;                                                   \
        }                                                                \
        MORSE_Tile_to_Lapack(_desc_, (void*)_name_, _lda_);              \
    }

#define PASTE_CODE_ALLOCATE_MATRIX(_name_, _cond_, _type_, _lda_, _n_)  \
    _type_ *_name_ = NULL;                                              \
    if( _cond_ ) {                                                      \
        _name_ = (_type_*)malloc( (_lda_) * (_n_) * sizeof(_type_) );   \
        if ( ! _name_ ) {                                               \
            fprintf(stderr, "Out of Memory for %s\n", #_name_);         \
            return -1;                                                  \
        }                                                               \
    }

#define PASTE_CODE_ALLOCATE_COPY(_name_, _cond_, _type_, _orig_, _lda_, _n_) \
    _type_ *_name_ = NULL;                                                   \
    if( _cond_ ) {                                                           \
        _name_ = (_type_*)malloc( (_lda_) * (_n_) * sizeof(_type_) );        \
        if ( ! _name_ ) {                                                    \
            fprintf(stderr, "Out of Memory for %s\n", #_name_);              \
            return -1;                                                       \
        }                                                                    \
        memcpy(_name_, _orig_, (_lda_) * (_n_) * sizeof(_type_) );           \
    }

/*********************
 *
 * Macro for trace generation
 *
 */
#define START_TRACING()                        \
    if(iparam[IPARAM_TRACE] == 2) {            \
    	RUNTIME_start_profiling();             \
    }                                          \
    if(iparam[IPARAM_BOUND]) {                 \
        MORSE_Enable(MORSE_BOUND);             \
    }

#define STOP_TRACING()                         \
    if(iparam[IPARAM_TRACE] == 2) {            \
    	RUNTIME_stop_profiling();              \
    }                                          \
    if(iparam[IPARAM_BOUND]) {                 \
        MORSE_Disable(MORSE_BOUND);            \
    }

/*********************
 *
 * Macro for DAG generation
 *
 */
#if 0
#define START_DAG()                   \
    if ( iparam[IPARAM_DAG] == 2 )    \
        MORSE_Enable(MORSE_DAG);

#define STOP_DAG()                    \
    if ( iparam[IPARAM_DAG] == 2 )    \
        MORSE_Disable(MORSE_DAG);
#else
#define START_DAG()  do {} while(0);
#define STOP_DAG()   do {} while(0);
#endif

/*********************
 *
 * Synchro for distributed computations
 *
 */
200
#if defined(CHAMELEON_USE_MPI)
201 202
#define START_DISTRIBUTED()  MORSE_Distributed_start();
#define STOP_DISTRIBUTED()   MORSE_Distributed_stop();
203 204 205 206 207 208 209 210 211 212 213
#else
#define START_DISTRIBUTED()  do {} while(0);
#define STOP_DISTRIBUTED()   do {} while(0);
#endif

/*********************
 *
 * General Macros for timing
 *
 */
#define START_TIMING()                \
214
  t = -RUNTIME_get_time();            \
215 216
  START_DAG();                        \
  START_TRACING();                    \
217
  START_DISTRIBUTED();
218 219 220 221 222 223 224

#define STOP_TIMING()                 \
  t += RUNTIME_get_time();            \
  if (iparam[IPARAM_PROFILE] == 2) {  \
    RUNTIME_kernelprofile_display();  \
    RUNTIME_schedprofile_display();   \
  }                                   \
225 226 227 228
  *t_ = t;                            \
  STOP_DISTRIBUTED();                 \
  STOP_TRACING();                     \
  STOP_DAG();
229 230

#endif /* TIMING_H */