timing.c 25.7 KB
Newer Older
1 2
/**
 *
3 4
 * @copyright (c) 2009-2014 The University of Tennessee and The University
 *                          of Tennessee Research Foundation.
5
 *                          All rights reserved.
6
 * @copyright (c) 2012-2016 Inria. All rights reserved.
7
 * @copyright (c) 2012-2015 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved.
8 9 10 11 12 13 14 15 16 17 18
 *
 **/

/**
 *
 * @file timing.c
 *
 *  MORSE auxiliary routines
 *  MORSE is a software package provided by Univ. of Tennessee,
 *  Univ. of California Berkeley and Univ. of Colorado Denver
 *
19
 * @version 0.9.0
20
 * @author Mathieu Faverge
BOUCHERIE Raphael's avatar
BOUCHERIE Raphael committed
21
 * @author Raphael Boucherie
22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
 * @author Dulceneia Becker
 * @author Cedric Castagnede
 * @date 2010-11-15
 *
 **/

#if defined( _WIN32 ) || defined( _WIN64 )
#define int64_t __int64
#endif

/* Define these so that the Microsoft VC compiler stops complaining
   about scanf and friends */
#define _CRT_SECURE_NO_DEPRECATE
#define _CRT_SECURE_NO_WARNINGS

#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
BOUCHERIE Raphael's avatar
BOUCHERIE Raphael committed
41
#include <limits.h>
42 43 44 45 46 47 48 49 50

#if defined( _WIN32 ) || defined( _WIN64 )
#include <windows.h>
#else  /* Non-Windows */
#include <unistd.h>
#include <sys/resource.h>
#endif

#include <morse.h>
Mathieu Faverge's avatar
Mathieu Faverge committed
51 52 53 54
#if !defined(CHAMELEON_SIMULATION)
#include <coreblas/lapacke.h>
#include <coreblas.h>
#endif
55 56
#include "flops.h"
#include "timing.h"
57
#include "control/auxiliary.h"
58

59
#if defined(CHAMELEON_USE_MPI)
60
#include <mpi.h>
61
#endif /* defined(CHAMELEON_USE_MPI */
62

63
#if defined(CHAMELEON_SCHED_STARPU)
64
#include <starpu.h>
65 66 67
#endif /* defined(CHAMELEON_SCHED_STARPU) */


BOUCHERIE Raphael's avatar
BOUCHERIE Raphael committed
68
#if defined(CHAMELEON_HAVE_GETOPT_H)
69
#include <getopt.h>
BOUCHERIE Raphael's avatar
BOUCHERIE Raphael committed
70
#endif /* defined(CHAMELEON_HAVE_GETOPT_H) */
71

72
static int RunTest(int *iparam, _PREC *dparam, double *t_);
73
static inline void* morse_getaddr_null(const MORSE_desc_t *A, int m, int n)
74
{
75
    (void)A;(void)m;(void)n;
76 77
    return (void*)( NULL );
}
78 79 80 81 82 83 84 85 86

int ISEED[4] = {0,0,0,1};   /* initial seed for zlarnv() */

static int
Test(int64_t n, int *iparam) {
    int      i, j, iter;
    int      thrdnbr, niter;
    int64_t  M, N, K, NRHS;
    double  *t;
87
#if defined(CHAMELEON_SIMULATION)
88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171
    _PREC    eps = 0.;
#else
    _PREC    eps = _LAMCH( 'e' );
#endif
    _PREC    dparam[IPARAM_DNBPARAM];
    double   fmuls, fadds, fp_per_mul, fp_per_add;
    double   sumgf, sumgf2, sumt, sd, flops, gflops;
    char    *s;
    char    *env[] = {
        "OMP_NUM_THREADS",
        "MKL_NUM_THREADS",
        "GOTO_NUM_THREADS",
        "ACML_NUM_THREADS",
        "ATLAS_NUM_THREADS",
        "BLAS_NUM_THREADS", ""
    };
    int gnuplot = 0;

/*
 * if hres = 0 then the test succeed
 * if hres = n then the test failed n times
 */
    int hres = 0;

    memset( &dparam, 0, IPARAM_DNBPARAM * sizeof(_PREC) );
    dparam[IPARAM_THRESHOLD_CHECK] = 100.0;

    thrdnbr = iparam[IPARAM_THRDNBR];
    niter   = iparam[IPARAM_NITER];

    M    = iparam[IPARAM_M];
    N    = iparam[IPARAM_N];
    K    = iparam[IPARAM_K];
    NRHS = K;
    (void)M;(void)N;(void)K;(void)NRHS;

    if ( (n < 0) || (thrdnbr < 0 ) ) {
        if (gnuplot && (MORSE_My_Mpi_Rank() == 0) ) {
            printf( "set title '%d_NUM_THREADS: ", thrdnbr );
            for (i = 0; env[i][0]; ++i) {
                s = getenv( env[i] );

                if (i) printf( " " ); /* separating space */

                for (j = 0; j < 5 && env[i][j] && env[i][j] != '_'; ++j)
                    printf( "%c", env[i][j] );

                if (s)
                    printf( "=%s", s );
                else
                    printf( "->%s", "?" );
            }
            printf( "'\n" );
            printf( "%s\n%s\n%s\n%s\n%s%s%s\n",
                    "set xlabel 'Matrix size'",
                    "set ylabel 'Gflop/s'",
                    "set key bottom",
                    gnuplot > 1 ? "set terminal png giant\nset output 'timeplot.png'" : "",
                    "plot '-' using 1:5 title '", _NAME, "' with linespoints" );
        }
        return 0;
    }

    if ( MORSE_My_Mpi_Rank() == 0)
        printf( "%7d %7d %7d ", iparam[IPARAM_M], iparam[IPARAM_N], iparam[IPARAM_K] );
    fflush( stdout );

    t = (double*)malloc(niter*sizeof(double));
    memset(t, 0, niter*sizeof(double));

    if (sizeof(_TYPE) == sizeof(_PREC)) {
        fp_per_mul = 1;
        fp_per_add = 1;
    } else {
        fp_per_mul = 6;
        fp_per_add = 2;
    }

    fadds = (double)(_FADDS);
    fmuls = (double)(_FMULS);
    flops = 1e-9 * (fmuls * fp_per_mul + fadds * fp_per_add);
    gflops = 0.0;

    if ( iparam[IPARAM_WARMUP] ) {
172 173
      int status = RunTest( iparam, dparam, &(t[0]));
      if (status != MORSE_SUCCESS) return status;
174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190
    }

    sumgf  = 0.0;
    double sumgf_upper  = 0.0;
    sumgf2 = 0.0;
    sumt   = 0.0;

    for (iter = 0; iter < niter; iter++)
    {
        if( iter == 0 ) {
          if ( iparam[IPARAM_TRACE] )
            iparam[IPARAM_TRACE] = 2;
          if ( iparam[IPARAM_DAG] )
            iparam[IPARAM_DAG] = 2;
          if ( iparam[IPARAM_PROFILE] )
            iparam[IPARAM_PROFILE] = 2;

191 192
          int status = RunTest( iparam, dparam, &(t[iter]));
          if (status != MORSE_SUCCESS) return status;
193 194 195 196 197

          iparam[IPARAM_TRACE] = 0;
          iparam[IPARAM_DAG] = 0;
          iparam[IPARAM_PROFILE] = 0;
        }
198 199 200 201
        else {
          int status = RunTest( iparam, dparam, &(t[iter]));
          if (status != MORSE_SUCCESS) return status;
        }
202 203
        gflops = flops / t[iter];

204
#if defined (CHAMELEON_SCHED_STARPU)
205
        /* TODO: create chameleon interface encapsulating this instead */
206 207
        if (iparam[IPARAM_BOUND])
        {
208 209 210
            double upper_gflops = 0.0;
            double tmin = 0.0;
            double integer_tmin = 0.0;
211 212 213
            starpu_bound_compute(&tmin, &integer_tmin, 0);
            upper_gflops  = (flops / (tmin / 1000.0));
            sumgf_upper += upper_gflops;
214 215 216 217 218 219 220 221 222 223 224 225 226
        }
#endif
        sumt   += t[iter];
        sumgf  += gflops;
        sumgf2 += gflops*gflops;
    }

    gflops = sumgf / niter;
    sd = sqrt((sumgf2 - (sumgf*sumgf)/niter)/niter);

    if ( MORSE_My_Mpi_Rank() == 0) {
        printf( "%9.3f %9.2f +-%7.2f  ", sumt/niter, gflops, sd);

227
        if (iparam[IPARAM_BOUND])
228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264
            printf(" %9.2f",  sumgf_upper/niter);

        if ( iparam[IPARAM_PEAK] )
        {
            if (dparam[IPARAM_ESTIMATED_PEAK]<0.0f)
                printf("  n/a    n/a   ");
            else
                printf("  %5.2f%%  %9.2f ", 100.0f*(gflops/dparam[IPARAM_ESTIMATED_PEAK]), dparam[IPARAM_ESTIMATED_PEAK]);
        }

        if ( iparam[IPARAM_CHECK] ){
            hres = ( dparam[IPARAM_RES] / n / eps / (dparam[IPARAM_ANORM] * dparam[IPARAM_XNORM] + dparam[IPARAM_BNORM] ) > dparam[IPARAM_THRESHOLD_CHECK] );

            if (hres)
                printf( "%8.5e %8.5e %8.5e %8.5e                       %8.5e FAILURE",
                    dparam[IPARAM_RES], dparam[IPARAM_ANORM], dparam[IPARAM_XNORM], dparam[IPARAM_BNORM],
                    dparam[IPARAM_RES] / n / eps / (dparam[IPARAM_ANORM] * dparam[IPARAM_XNORM] + dparam[IPARAM_BNORM] ));
            else
                printf( "%8.5e %8.5e %8.5e %8.5e                       %8.5e SUCCESS",
                    dparam[IPARAM_RES], dparam[IPARAM_ANORM], dparam[IPARAM_XNORM], dparam[IPARAM_BNORM],
                    dparam[IPARAM_RES] / n / eps / (dparam[IPARAM_ANORM] * dparam[IPARAM_XNORM] + dparam[IPARAM_BNORM] ));
        }

        if ( iparam[IPARAM_INVERSE] )
            printf( " %8.5e %8.5e %8.5e     %8.5e",
                    dparam[IPARAM_RNORM], dparam[IPARAM_ANORM], dparam[IPARAM_AinvNORM],
                    dparam[IPARAM_RNORM] /((dparam[IPARAM_ANORM] * dparam[IPARAM_AinvNORM])*n*eps));

        printf("\n");

        fflush( stdout );
    }
    free(t);

    return hres;
}

265
static inline int
266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344
startswith(const char *s, const char *prefix) {
    size_t n = strlen( prefix );
    if (strncmp( s, prefix, n ))
        return 0;
    return 1;
}

static int
get_range(char *range, int *start_p, int *stop_p, int *step_p) {
    char *s, *s1, buf[21];
    int colon_count, copy_len, nbuf=20, n;
    int start=1000, stop=10000, step=1000;

    colon_count = 0;
    for (s = strchr( range, ':'); s; s = strchr( s+1, ':'))
        colon_count++;

    if (colon_count == 0) { /* No colon in range. */
        if (sscanf( range, "%d", &start ) < 1 || start < 1)
            return -1;
        step = start / 10;
        if (step < 1) step = 1;
        stop = start + 10 * step;

    } else if (colon_count == 1) { /* One colon in range.*/
        /* First, get the second number (after colon): the stop value. */
        s = strchr( range, ':' );
        if (sscanf( s+1, "%d", &stop ) < 1 || stop < 1)
            return -1;

        /* Next, get the first number (before colon): the start value. */
        n = s - range;
        copy_len = n > nbuf ? nbuf : n;
        strncpy( buf, range, copy_len );
        buf[copy_len] = 0;
        if (sscanf( buf, "%d", &start ) < 1 || start > stop || start < 1)
            return -1;

        /* Let's have 10 steps or less. */
        step = (stop - start) / 10;
        if (step < 1)
            step = 1;
    } else if (colon_count == 2) { /* Two colons in range. */
        /* First, get the first number (before the first colon): the start value. */
        s = strchr( range, ':' );
        n = s - range;
        copy_len = n > nbuf ? nbuf : n;
        strncpy( buf, range, copy_len );
        buf[copy_len] = 0;
        if (sscanf( buf, "%d", &start ) < 1 || start < 1)
            return -1;

        /* Next, get the second number (after the first colon): the stop value. */
        s1 = strchr( s+1, ':' );
        n = s1 - (s + 1);
        copy_len = n > nbuf ? nbuf : n;
        strncpy( buf, s+1, copy_len );
        buf[copy_len] = 0;
        if (sscanf( buf, "%d", &stop ) < 1 || stop < start)
            return -1;

        /* Finally, get the third number (after the second colon): the step value. */
        if (sscanf( s1+1, "%d", &step ) < 1 || step < 1)
            return -1;
    } else

        return -1;

    *start_p = start;
    *stop_p = stop;
    *step_p = step;

    return 0;
}

static void
show_help(char *prog_name) {
    printf( "Usage:\n%s [options]\n\n", prog_name );
    printf( "Options are:\n"
BOUCHERIE Raphael's avatar
BOUCHERIE Raphael committed
345
            "  -h  --help               Show this help\n"
346
            "\n"
BOUCHERIE Raphael's avatar
BOUCHERIE Raphael committed
347 348 349 350 351
            "  Machine parameters:\n"
            "    -t, --threads=x        Number of CPU workers (default: automatic detection through runtime)\n"
            "    -g, --gpus=x           Number of GPU workers (default: 0)\n"
            "    -P, --P=x              Rows (P) in the PxQ process grid (deafult: 1)\n"
            "        --nocpu            All GPU kernels are exclusively executed on GPUs (default: 0)\n"
352
            "\n"
BOUCHERIE Raphael's avatar
BOUCHERIE Raphael committed
353 354 355 356 357
            "  Matrix parameters:\n"
            "    -m, --m, --M=x         Dimension (M) of the matrices (default: N)\n"
            "    -n, --n, --N=x         Dimension (N) of the matrices\n"
            "    -N, --n_range=R        Range of N values\n"
            "                           with R=Start:Stop:Step (default: 500:5000:500)\n"
BOUCHERIE Raphael's avatar
Minor  
BOUCHERIE Raphael committed
358
            "    -k, --k, --K, --nrhs=x Dimension (K) of the matrices or number of right-hand size (default: 1)\n"
BOUCHERIE Raphael's avatar
NB  
BOUCHERIE Raphael committed
359
            "    -b, --nb=x             NB size. (default: 320)\n"
BOUCHERIE Raphael's avatar
BOUCHERIE Raphael committed
360
            "    -i, --ib=x             IB size. (default: 32)\n"
361 362
            //"    -x, --mx=x             ?\n" todo
            //"    -X, --nx=x             ?\n" todo
363
            "\n"
BOUCHERIE Raphael's avatar
BOUCHERIE Raphael committed
364 365 366 367 368 369
            "  Check/prints:\n"
            "        --niter=x          Number of iterations performed for each test (default: 1)\n"
            "    -W, --nowarnings       Do not show warnings\n"
            "    -w, --nowarmup         Cancel the warmup run to pre-load libraries\n"
            "    -c, --check            Check result\n"
            "    -C, --inv              Check on inverse\n"
BOUCHERIE Raphael's avatar
Minor  
BOUCHERIE Raphael committed
370 371
            "        --mode=x           Change xLATMS matrix mode generation for SVD/EVD (default: 4)\n"
            "                           Must be between 0 and 20 included\n"
372
            "\n"
BOUCHERIE Raphael's avatar
BOUCHERIE Raphael committed
373 374 375 376 377 378
            "  Profiling:\n"
            "    -T, --trace            Enable trace generation\n"
            "        --progress         Display progress indicator\n"
            "    -d, --dag              Enable DAG generation\n"
            "                           Generates a dot_dag_file.dot.\n"
            "    -p, --profile          Print profiling informations\n"
379
            "\n"
BOUCHERIE Raphael's avatar
BOUCHERIE Raphael committed
380 381
            "  HQR options:\n"
            "    -a, --qr_a, --rhblk=N  If N > 0, enable Householder mode for QR and LQ factorization\n"
BOUCHERIE Raphael's avatar
Minor  
BOUCHERIE Raphael committed
382
            "                           N is the size of each subdomain (default: -1)\n"
BOUCHERIE Raphael's avatar
BOUCHERIE Raphael committed
383 384
            "    -l, --llvl=x           Tree used for low level reduction inside nodes (default: -1)\n"
            "    -L, --hlvl=x           Tree used for high level reduction between nodes, only if P > 1 (default: -1).\n"
BOUCHERIE Raphael's avatar
Minor  
BOUCHERIE Raphael committed
385
            "                           (-1: Automatic, 0: Flat, 1: Greedy, 2: Fibonacci, 3: Binary, 4: Replicated greedy)\n"
BOUCHERIE Raphael's avatar
BOUCHERIE Raphael committed
386 387 388
            "    -D, --domino           Enable the domino between upper and lower trees.\n"
            "\n"
            "  Advanced options\n"
BOUCHERIE Raphael's avatar
Minor  
BOUCHERIE Raphael committed
389
            "        --nobigmat         Disable single large matrix allocation for multiple tiled allocations\n"
BOUCHERIE Raphael's avatar
BOUCHERIE Raphael committed
390
            "    -s, --sync             Enable synchronous calls in wrapper function such as POTRI\n"
BOUCHERIE Raphael's avatar
Minor  
BOUCHERIE Raphael committed
391
            "    -o, --ooc              Enable out-of-core (available only with StarPU)\n"
BOUCHERIE Raphael's avatar
BOUCHERIE Raphael committed
392
            "    -G, --gemm3m           Use gemm3m complex method\n"
393
            //"        --peak             ?\n"todo
394
            "        --bound            Compare result to area bound\n"
395 396 397 398 399 400 401 402 403 404
            "\n");
}


static void
print_header(char *prog_name, int * iparam) {
    const char *bound_header   = iparam[IPARAM_BOUND]   ? "   thGflop/s" : "";
    const char *check_header   = iparam[IPARAM_CHECK]   ? "     ||Ax-b||       ||A||       ||x||       ||b|| ||Ax-b||/N/eps/(||A||||x||+||b||)  RETURN" : "";
    const char *inverse_header = iparam[IPARAM_INVERSE] ? " ||I-A*Ainv||       ||A||    ||Ainv||       ||Id - A*Ainv||/((||A|| ||Ainv||).N.eps)" : "";
    const char *peak_header    = iparam[IPARAM_PEAK]    ? "  (% of peak)  peak" : "";
405
#if defined(CHAMELEON_SIMULATION)
406 407 408 409 410 411
    _PREC    eps = 0.;
#else
    _PREC    eps = _LAMCH( 'e' );
#endif

    printf( "#\n"
412
            "# CHAMELEON %d.%d.%d, %s\n"
413 414
            "# Nb threads: %d\n"
            "# Nb GPUs:    %d\n"
415 416 417 418
#if defined(CHAMELEON_USE_MPI)
            "# Nb mpi:     %d\n"
            "# PxQ:        %dx%d\n"
#endif
419 420 421 422
            "# NB:         %d\n"
            "# IB:         %d\n"
            "# eps:        %e\n"
            "#\n",
423 424 425
            CHAMELEON_VERSION_MAJOR,
            CHAMELEON_VERSION_MINOR,
            CHAMELEON_VERSION_MICRO,
426 427 428
            prog_name,
            iparam[IPARAM_THRDNBR],
            iparam[IPARAM_NCUDAS],
429 430 431 432
#if defined(CHAMELEON_USE_MPI)
            iparam[IPARAM_NMPI],
            iparam[IPARAM_P], iparam[IPARAM_Q],
#endif
433 434 435 436 437 438 439 440 441
            iparam[IPARAM_NB],
            iparam[IPARAM_IB],
            eps );

    printf( "#     M       N  K/NRHS   seconds   Gflop/s Deviation%s%s%s\n",
            bound_header, peak_header, iparam[IPARAM_INVERSE] ? inverse_header : check_header);
    return;
}

442
#define GETOPT_STRING "ht:g:P:8M:m:N:n:K:k:b:i:x:X:1:WwcCT2dpa:l:L:D9:3soG45"
BOUCHERIE Raphael's avatar
BOUCHERIE Raphael committed
443
#if defined(CHAMELEON_HAVE_GETOPT_LONG)
444 445
static struct option long_options[] =
{
BOUCHERIE Raphael's avatar
BOUCHERIE Raphael committed
446
    {"help",          no_argument,       0,      'h'},
BOUCHERIE Raphael's avatar
BOUCHERIE Raphael committed
447
    // Configuration
448 449
    {"threads",       required_argument, 0,      't'},
    {"gpus",          required_argument, 0,      'g'},
BOUCHERIE Raphael's avatar
BOUCHERIE Raphael committed
450 451 452 453
    {"P",             required_argument, 0,      'P'},
    {"nocpu",         no_argument,       0,      '8'},
    // Matrix parameters
    {"M",             required_argument, 0,      'm'},
BOUCHERIE Raphael's avatar
BOUCHERIE Raphael committed
454
    {"m",             required_argument, 0,      'm'},
BOUCHERIE Raphael's avatar
BOUCHERIE Raphael committed
455 456 457
    {"N",             required_argument, 0,      'n'},
    {"n",             required_argument, 0,      'n'},
    {"n_range",       required_argument, 0,      'N'},
BOUCHERIE Raphael's avatar
Minor  
BOUCHERIE Raphael committed
458
    {"K",             required_argument, 0,      'K'},
459
    {"k",             required_argument, 0,      'k'},
BOUCHERIE Raphael's avatar
BOUCHERIE Raphael committed
460 461 462
    {"nrhs",          required_argument, 0,      'k'},
    {"nb",            required_argument, 0,      'b'},
    {"ib",            required_argument, 0,      'i'},
463 464
    {"mx",            required_argument, 0,      'x'},
    {"nx",            required_argument, 0,      'X'},
BOUCHERIE Raphael's avatar
BOUCHERIE Raphael committed
465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482
    // Check/prints
    {"niter",         required_argument, 0,      '1'},
    {"nowarnings",    no_argument,       0,      'W'},
    {"nowarmup",      no_argument,       0,      'w'},
    {"check",         no_argument,       0,      'c'},
    {"inv",           no_argument,       0,      'C'},
    // Profiling
    {"trace",         no_argument,       0,      'T'},
    {"progress",      no_argument,       0,      '2'},
    {"dag",           no_argument,       0,      'd'},
    {"profile",       no_argument,       0,      'p'},
    // HQR options
    {"rhblk",         required_argument, 0,      'a'},
    {"qr_a",          required_argument, 0,      'a'},
    {"llvl",          required_argument, 0,      'l'},
    {"hlvl",          required_argument, 0,      'L'},
    {"domino",        no_argument,       0,      'D'},
    // Other
BOUCHERIE Raphael's avatar
Minor  
BOUCHERIE Raphael committed
483
    {"mode",          required_argument, 0,      '9'},
BOUCHERIE Raphael's avatar
BOUCHERIE Raphael committed
484 485 486 487 488 489
    {"nobigmat",      no_argument,       0,      '3'},
    {"sync",          no_argument,       0,      's'},
    {"ooc",           no_argument,       0,      'o'},
    {"gemm3m",        no_argument,       0,      'G'},
    {"peak",          no_argument,       0,      '4'},
    {"bound",         no_argument,       0,      '5'},
490 491
    {0, 0, 0, 0}
};
BOUCHERIE Raphael's avatar
BOUCHERIE Raphael committed
492
#endif  /* defined(CHAMELEON_HAVE_GETOPT_LONG) */
493 494 495

static void
set_iparam_default(int *iparam){
496 497 498 499 500 501 502 503 504 505 506

    memset(iparam, 0, IPARAM_SIZEOF*sizeof(int));

    iparam[IPARAM_THRDNBR       ] = -1;
    iparam[IPARAM_THRDNBR_SUBGRP] = 1;
    iparam[IPARAM_M             ] = -1;
    iparam[IPARAM_N             ] = 500;
    iparam[IPARAM_K             ] = 1;
    iparam[IPARAM_LDA           ] = -1;
    iparam[IPARAM_LDB           ] = -1;
    iparam[IPARAM_LDC           ] = -1;
BOUCHERIE Raphael's avatar
BOUCHERIE Raphael committed
507 508
    iparam[IPARAM_MB            ] = 320;
    iparam[IPARAM_NB            ] = 320;
509 510
    iparam[IPARAM_IB            ] = 32;
    iparam[IPARAM_NITER         ] = 1;
511
    iparam[IPARAM_WARMUP        ] = 1;
512
    iparam[IPARAM_BIGMAT        ] = 1;
513 514 515 516 517 518
    iparam[IPARAM_ASYNC         ] = 1;
    iparam[IPARAM_MX            ] = -1;
    iparam[IPARAM_NX            ] = -1;
    iparam[IPARAM_MX            ] = -1;
    iparam[IPARAM_NX            ] = -1;
    iparam[IPARAM_INPLACE       ] = MORSE_OUTOFPLACE;
519
    iparam[IPARAM_NMPI          ] = 1;
520 521
    iparam[IPARAM_P             ] = 1;
    iparam[IPARAM_Q             ] = 1;
522
    iparam[IPARAM_PRINT_WARNINGS] = 1;
BOUCHERIE Raphael's avatar
BOUCHERIE Raphael committed
523 524 525 526 527
    iparam[IPARAM_LOWLVL_TREE   ] = -1;
    iparam[IPARAM_HIGHLVL_TREE  ] = -1;
    iparam[IPARAM_QR_TS_SZE     ] = -1;
    iparam[IPARAM_QR_HLVL_SZE   ] = -1;
    iparam[IPARAM_QR_DOMINO     ] = -1;
528 529
}

530
static inline int
BOUCHERIE Raphael's avatar
BOUCHERIE Raphael committed
531
read_integer_from_options(int long_index, int opt_char)
532 533 534 535 536 537 538 539 540 541
{
    char *endptr;
    long int value;
    (void) long_index;

    value = strtol(optarg, &endptr, 10);
    if ( *optarg == '\0' || *endptr != '\0' ) {
#ifdef CHAMELEON_HAVE_GETOPT_LONG
        if ( long_index < 0 ) {
#endif
BOUCHERIE Raphael's avatar
BOUCHERIE Raphael committed
542
            fprintf(stderr, "Invalid numeric value '%s' for '-%c' parameter\n", optarg, opt_char);
543 544
#ifdef CHAMELEON_HAVE_GETOPT_LONG
        } else {
BOUCHERIE Raphael's avatar
BOUCHERIE Raphael committed
545
            fprintf(stderr, "Invalid numeric value '%s' for '--%s' parameter\n", optarg, long_options[long_index].name);
546 547 548 549 550 551 552 553 554 555 556
        }
#endif
        exit(EXIT_FAILURE);
    }
    if ( value > INT_MAX || value < INT_MIN ) {
        fprintf(stderr, "Out of range integer '%ld'\n", value);
        exit(EXIT_FAILURE);
    }
    return (int)value;
}

557 558 559
void
parse_arguments(int *_argc, char ***_argv, int *iparam, int *start, int *stop, int*step)
{
BOUCHERIE Raphael's avatar
BOUCHERIE Raphael committed
560
    int opt = -1;
561 562 563 564
    int c;
    int argc = *_argc;
    char **argv = *_argv;

565
    optind = 1;
566
    do {
BOUCHERIE Raphael's avatar
BOUCHERIE Raphael committed
567
#if defined(CHAMELEON_HAVE_GETOPT_LONG)
BOUCHERIE Raphael's avatar
BOUCHERIE Raphael committed
568
        opt = -1;
569 570 571 572 573
        c = getopt_long(argc, argv, GETOPT_STRING,
                             long_options, &opt);
#else
        c = getopt(argc, argv, GETOPT_STRING);
        (void) opt;
BOUCHERIE Raphael's avatar
BOUCHERIE Raphael committed
574
#endif  /* defined(CHAMELEON_HAVE_GETOPT_LONG) */
575 576 577

        switch(c)
        {
BOUCHERIE Raphael's avatar
BOUCHERIE Raphael committed
578
        // Configuration
BOUCHERIE Raphael's avatar
BOUCHERIE Raphael committed
579 580 581
        case 't' : iparam[IPARAM_THRDNBR       ] = read_integer_from_options(opt, c); break;
        case 'g' : iparam[IPARAM_NCUDAS        ] = read_integer_from_options(opt, c); break;
        case 'P' : iparam[IPARAM_P             ] = read_integer_from_options(opt, c); break;
BOUCHERIE Raphael's avatar
BOUCHERIE Raphael committed
582 583
        case '8' : iparam[IPARAM_NO_CPU        ] = 1; break;
        // Matrix parameters
BOUCHERIE Raphael's avatar
Minor  
BOUCHERIE Raphael committed
584
        case 'M' :
BOUCHERIE Raphael's avatar
BOUCHERIE Raphael committed
585 586
        case 'm' : iparam[IPARAM_M             ] = read_integer_from_options(opt, c); break;
        case 'n' : iparam[IPARAM_N             ] = read_integer_from_options(opt, c); break;
BOUCHERIE Raphael's avatar
BOUCHERIE Raphael committed
587
        case 'N' : get_range(optarg, start, stop, step); break;
BOUCHERIE Raphael's avatar
Minor  
BOUCHERIE Raphael committed
588
        case 'K' :
BOUCHERIE Raphael's avatar
BOUCHERIE Raphael committed
589 590 591 592 593 594
        case 'k' : iparam[IPARAM_K             ] = read_integer_from_options(opt, c); break;
        case 'b' : iparam[IPARAM_NB            ] = read_integer_from_options(opt, c);
                   iparam[IPARAM_MB            ] = read_integer_from_options(opt, c); break;
        case 'i' : iparam[IPARAM_IB            ] = read_integer_from_options(opt, c); break;
        case 'x' : iparam[IPARAM_MX            ] = read_integer_from_options(opt, c); break;
        case 'X' : iparam[IPARAM_NX            ] = read_integer_from_options(opt, c); break;
BOUCHERIE Raphael's avatar
BOUCHERIE Raphael committed
595
        // Check/prints
BOUCHERIE Raphael's avatar
BOUCHERIE Raphael committed
596
        case '1' : iparam[IPARAM_NITER         ] = read_integer_from_options(opt, c); break;
BOUCHERIE Raphael's avatar
BOUCHERIE Raphael committed
597 598
        case 'W' : iparam[IPARAM_PRINT_WARNINGS] = 0; break;
        case 'w' : iparam[IPARAM_WARMUP        ] = 0; break;
599
        case 'c' : iparam[IPARAM_CHECK         ] = 1; break;
BOUCHERIE Raphael's avatar
BOUCHERIE Raphael committed
600
        case 'C' : iparam[IPARAM_INVERSE       ] = 1; break;
BOUCHERIE Raphael's avatar
BOUCHERIE Raphael committed
601
        // Profiling
602
        case 'T' : iparam[IPARAM_TRACE         ] = 1; break;
BOUCHERIE Raphael's avatar
BOUCHERIE Raphael committed
603
        case '2' : iparam[IPARAM_PROGRESS      ] = 1; break;
604
        case 'd' : iparam[IPARAM_DAG           ] = 1; break;
BOUCHERIE Raphael's avatar
BOUCHERIE Raphael committed
605 606
        case 'p' : iparam[IPARAM_PROFILE       ] = 1; break;
        // HQR options
BOUCHERIE Raphael's avatar
BOUCHERIE Raphael committed
607 608 609
        case 'a' : iparam[IPARAM_RHBLK         ] = read_integer_from_options(opt, c); break;
        case 'l' : iparam[IPARAM_LOWLVL_TREE   ] = read_integer_from_options(opt, c); break;
        case 'L' : iparam[IPARAM_HIGHLVL_TREE  ] = read_integer_from_options(opt, c); break;
BOUCHERIE Raphael's avatar
Minor  
BOUCHERIE Raphael committed
610
        case 'D' : iparam[IPARAM_QR_DOMINO     ] = 1; break;
BOUCHERIE Raphael's avatar
BOUCHERIE Raphael committed
611
        //Other
BOUCHERIE Raphael's avatar
BOUCHERIE Raphael committed
612
        case '9' : iparam[IPARAM_MODE          ] = read_integer_from_options(opt, c); break;
BOUCHERIE Raphael's avatar
BOUCHERIE Raphael committed
613
        case '3' : iparam[IPARAM_BIGMAT        ] = 0; break;
614 615
        case 's' : iparam[IPARAM_ASYNC         ] = 0; break;
        case 'o' : iparam[IPARAM_OOC           ] = 1; break;
BOUCHERIE Raphael's avatar
BOUCHERIE Raphael committed
616
        case 'G' : iparam[IPARAM_GEMM3M        ] = 1; break;
617
        case '4' : iparam[IPARAM_PEAK          ] = 1; break;
BOUCHERIE Raphael's avatar
BOUCHERIE Raphael committed
618
        case '5' : iparam[IPARAM_BOUND         ] = 1; break;
BOUCHERIE Raphael's avatar
BOUCHERIE Raphael committed
619 620 621
        case 'h' :
        case '?' :
            show_help(argv[0]); exit(EXIT_FAILURE);
622 623
        default:
            break;
624
        }
BOUCHERIE Raphael's avatar
Minor  
BOUCHERIE Raphael committed
625
    } while(-1 != c);
626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641
}

int
main(int argc, char *argv[]) {
    int i, m, mx, nx;
    int nbnode = 1;
    int start =  500;
    int stop  = 5000;
    int step  =  500;
    int iparam[IPARAM_SIZEOF];
    int success = 0;

    set_iparam_default(iparam);

    parse_arguments(&argc, &argv, iparam, &start, &stop, &step);

642 643 644 645 646 647 648
#if !defined(CHAMELEON_USE_CUDA)
    if (iparam[IPARAM_NCUDAS] != 0){
    	fprintf(stderr, "ERROR: CHAMELEON_USE_CUDA is not defined. "
    			"The number of CUDA devices must be set to 0 (--gpus=0).\n");
    	return EXIT_FAILURE;
    }
#endif
649 650 651 652 653 654 655 656 657

    m  = iparam[IPARAM_M];
    mx = iparam[IPARAM_MX];
    nx = iparam[IPARAM_NX];

    /* Initialize MORSE */
    MORSE_Init( iparam[IPARAM_THRDNBR],
                iparam[IPARAM_NCUDAS] );

Mathieu Faverge's avatar
Mathieu Faverge committed
658
    /* Get the number of threads set by the runtime */
Mathieu Faverge's avatar
Mathieu Faverge committed
659 660
    iparam[IPARAM_THRDNBR] = MORSE_GetThreadNbr();

661 662 663 664
    /* Stops profiling here to avoid profiling uninteresting routines.
       It will be reactivated in the time_*.c routines with the macro START_TIMING() */
    RUNTIME_stop_profiling();

665 666 667 668 669 670 671 672 673 674 675 676
    MORSE_Disable(MORSE_AUTOTUNING);
    MORSE_Set(MORSE_TILE_SIZE,        iparam[IPARAM_NB] );
    MORSE_Set(MORSE_INNER_BLOCK_SIZE, iparam[IPARAM_IB] );

    /* Householder mode */
    if (iparam[IPARAM_RHBLK] < 1) {
        MORSE_Set(MORSE_HOUSEHOLDER_MODE, MORSE_FLAT_HOUSEHOLDER);
    } else {
        MORSE_Set(MORSE_HOUSEHOLDER_MODE, MORSE_TREE_HOUSEHOLDER);
        MORSE_Set(MORSE_HOUSEHOLDER_SIZE, iparam[IPARAM_RHBLK]);
    }

Mathieu Faverge's avatar
Minor  
Mathieu Faverge committed
677
    if (iparam[IPARAM_PROFILE] == 1) {
678
        MORSE_Enable(MORSE_PROFILING_MODE);
Mathieu Faverge's avatar
Minor  
Mathieu Faverge committed
679
    }
680

Mathieu Faverge's avatar
Minor  
Mathieu Faverge committed
681
    if (iparam[IPARAM_PROGRESS] == 1) {
682
        MORSE_Enable(MORSE_PROGRESS);
Mathieu Faverge's avatar
Minor  
Mathieu Faverge committed
683
    }
684

Mathieu Faverge's avatar
Minor  
Mathieu Faverge committed
685
    if (iparam[IPARAM_PRINT_WARNINGS] == 0) {
686
        MORSE_Disable(MORSE_WARNINGS);
Mathieu Faverge's avatar
Minor  
Mathieu Faverge committed
687
    }
688

Mathieu Faverge's avatar
Minor  
Mathieu Faverge committed
689
    if (iparam[IPARAM_GEMM3M] == 1) {
690
        MORSE_Enable(MORSE_GEMM3M);
Mathieu Faverge's avatar
Minor  
Mathieu Faverge committed
691
    }
692

693
#if defined(CHAMELEON_USE_MPI)
Mathieu Faverge's avatar
Mathieu Faverge committed
694
    nbnode = MORSE_Comm_size();
695
    iparam[IPARAM_NMPI] = nbnode;
696 697 698 699 700 701 702 703 704 705 706 707 708
    /* Check P */
    if ( (iparam[IPARAM_P] > 1) &&
         (nbnode % iparam[IPARAM_P] != 0) ) {
      fprintf(stderr, "ERROR: %d doesn't divide the number of node %d\n",
              iparam[IPARAM_P], nbnode );
      return EXIT_FAILURE;
    }
#endif
    iparam[IPARAM_Q] = nbnode / iparam[IPARAM_P];

    /* Layout conversion */
    MORSE_Set(MORSE_TRANSLATION_MODE, iparam[IPARAM_INPLACE]);

Mathieu Faverge's avatar
Minor  
Mathieu Faverge committed
709
    if ( MORSE_My_Mpi_Rank() == 0 ) {
710
        print_header( argv[0], iparam);
Mathieu Faverge's avatar
Minor  
Mathieu Faverge committed
711
    }
712 713 714

    if (step < 1) step = 1;

715 716
    int status = Test( -1, iparam ); /* print header */
    if (status != MORSE_SUCCESS) return status;
717 718 719 720
    for (i = start; i <= stop; i += step)
    {
        if ( nx > 0 ) {
            iparam[IPARAM_M] = i;
721
            iparam[IPARAM_N] = chameleon_max(1, i/nx);
722
        } else if ( mx > 0 ) {
723
            iparam[IPARAM_M] = chameleon_max(1, i/mx);
724 725 726 727 728 729
            iparam[IPARAM_N] = i;
        } else {
            if ( m == -1 )
                iparam[IPARAM_M] = i;
            iparam[IPARAM_N] = i;
        }
730 731 732
        int status = Test( iparam[IPARAM_N], iparam );
        if (status != MORSE_SUCCESS) return status;
        success += status;
733 734 735 736 737 738
    }

    MORSE_Finalize();
    return success;
}