Merge branch 'issue40' into 'master'

Cleanup the timing parameters and their documentation Closes #40 See merge request !83

Merge branch 'issue40' into 'master'
Cleanup the timing parameters and their documentation Closes #40 See merge request !83
bd9b8f15 · Mathieu Faverge · b7ab8c09 · d1c68fb1 · bd9b8f15 · bd9b8f15
Commit bd9b8f15 authored 7 years ago by Mathieu Faverge
--- a/doc/orgmode/chapters/using.org
+++ b/doc/orgmode/chapters/using.org
@@ -165,33 +165,58 @@
     #+end_src

     List of main options that can be used in timing:
-     * ~--help~: show usage
-     * ~--threads~: Number of CPU workers (default:
-       ~_SC_NPROCESSORS_ONLN~)
-     * ~--gpus~: number of GPU workers (default: ~0~)
-     * ~--n_range=R~: range of N values, with ~R=Start:Stop:Step~
-       (default: ~500:5000:500~)
-     * ~--m=X~: dimension (M) of the matrices (default: ~N~)
-     * ~--k=X~: dimension (K) of the matrices (default: ~1~), useful for
-       GEMM algorithm (k is the shared dimension and must be defined
-       >1 to consider matrices and not vectors)
-     * ~--nrhs=X~: number of right-hand size (default: ~1~)
-     * ~--nb=X~: block/tile size. (default: ~128~)
-     * ~--ib=X~: inner-blocking/IB size. (default: ~32~)
-     * ~--niter=X~: number of iterations performed for each test
-       (default: ~1~)
-     * ~--rhblk=X~: if X > 0, enable Householder mode for QR and LQ
-       factorization. X is the size of each subdomain (default: ~0~)
-     * ~--[no]check~: check result (default: ~nocheck~)
-     * ~--[no]profile~: print profiling informations (default:
-       ~noprofile~)
-     * ~--[no]trace~: enable/disable trace generation (default: ~notrace~)
-     * ~--[no]dag~: enable/disable DAG generation (default: ~nodag~)
-     * ~--[no]inv~: check on inverse (default: ~noinv~)
-     * ~--nocpu~: all GPU kernels are exclusively executed on GPUs
-     * ~--ooc~: Enable out-of-core (available only with StarPU)
-     * ~--bound~: Compare result to area bound (available only with
-       StarPU) (default: ~0~)
+     * ~--help~: Show usage
+     * Machine parameters
+       * ~-t x, --threads=x~: Number of CPU workers (default: automatic
+         detection through runtime)
+       * ~-g x, --gpus=x~: Number of GPU workers (default: ~0~)
+       * ~-P x, --P=x~: Rows (P) in the PxQ process grid (default: ~1~)
+       * ~--nocpu~: All GPU kernels are exclusively executed on GPUs
+     * Matrix parameters
+       * ~-m x, --m=X, --M=x~: Dimension (M) of the matrices (default:
+         ~N~)
+       * ~-n x, --n=X, --N=x~: Dimension (N) of the matrices
+       * ~-N R, --n_range=R~: Range of N values to time with
+         ~R=Start:Stop:Step~ (default: ~500:5000:500~)
+       * ~-k x, --k=x, --K=x, --nrhs=x~: Dimension (K) of the matrices
+         or number of right-hand size (default: ~1~). This is useful for
+         GEMM algorithms (k is the shared dimension and must be defined
+         >1 to consider matrices and not vectors)
+       * ~-b x, --nb=x~: NB size. (default: ~320~)
+       * ~-i x, --ib=x~: IB size. (default: ~32~)
+     * Check/prints
+       * ~--niter=X~: Number of iterations performed for each test
+         (default: ~1~)
+       * ~-W, --nowarning~: Do not show warnings
+       * ~-w, --nowarmup~: Cancel the warmup run to pre-load libraries
+       * ~-c, --check~: Check result
+       * ~-C, --inc~: Check on inverse
+       * ~--mode=x~ : Change the xLATMS matrix mode generation for
+         SVD/EVD (default: ~4~). It must be between 0 and 20 included.
+     * Profiling parameters
+       * ~-T, --trace~: Enable trace generation
+       * ~--progress~: Display progress indicator
+       * ~-d, --dag~: Enable DAG generation. Generates a dot_dag_file.dot.
+       * ~-p, --profile~: Print profiling informations
+     * HQR parameters
+       * ~-a x, --qr_a=x, --rhblk=x~: Define the size of the local TS
+         trees in housholder reduction trees for QR and LQ
+         factorization. N is the size of each subdomain (default: ~-1~)
+       * ~-l x, --llvl=x~: Tree used for low level reduction inside
+         nodes (default: ~-1~)
+       * ~-L x, --hlvl=x~: Tree used for high level reduction between
+         nodes, only if P > 1 (default: ~-1~). Possible values are -1:
+         Automatic, 0: Flat, 1: Greedy, 2: Fibonacci, 3: Binary, 4:
+         Replicated greedy.
+       * ~-D, --domino~: Enable the domino between upper and lower trees
+     * Advanced options
+       * ~--nobigmat~: Disable single large matrix allocation for
+         multiple tiled allocations
+       * ~-s, --sync~: Enable synchronous calls in wrapper function such
+         as POTRI
+       * ~-o, --ooc~: Enable out-of-core (available only with StarPU)
+       * ~-G, --gemm3m~: Use gemm3m complex method
+       * ~--bound~: Compare result to area bound

     List of timing algorithms available:
     * LANGE: norms of matrices

--- a/doc/texinfo/chapters/using.texi
+++ b/doc/texinfo/chapters/using.texi
@@ -83,36 +83,53 @@ copy from LAPACK matrix layout to tile matrix layout are necessary.
  List of main options that can be used in timing:
  @itemize @bullet
    @item @option{--help}: show usage
-    @item @option{--threads}: Number of CPU workers (default:
-@option{_SC_NPROCESSORS_ONLN})
-    @item @option{--gpus}: number of GPU workers (default: @option{0})
-    @item @option{--n_range=R}: range of N values, with
-@option{R=Start:Stop:Step}
-(default: @option{500:5000:500})
-    @item @option{--m=X}: dimension (M) of the matrices (default: @option{N})
-    @item @option{--k=X}: dimension (K) of the matrices (default: @option{1}),
-useful for GEMM algorithm (k is the shared dimension and must be defined >1 to
-consider matrices and not vectors)
-    @item @option{--nrhs=X}: number of right-hand size (default: @option{1})
-    @item @option{--nb=X}: block/tile size. (default: @option{128})
-    @item @option{--ib=X}: inner-blocking/IB size. (default: @option{32})
-    @item @option{--niter=X}: number of iterations performed for each test
-(default: @option{1})
-    @item @option{--rhblk=X}: if X > 0, enable Householder mode for QR and LQ
-factorization. X is the size of each subdomain (default: @option{0})
-    @item @option{--[no]check}: check result (default: @option{nocheck})
-    @item @option{--[no]profile}: print profiling informations (default:
-@option{noprofile})
-    @item @option{--[no]trace}: enable/disable trace generation (default:
-@option{notrace})
-    @item @option{--[no]dag}: enable/disable DAG generation (default:
-@option{nodag})
-    @item @option{--[no]inv}: check on inverse (default: @option{noinv})
-    @item @option{--nocpu}: all GPU kernels are exclusively executed on GPUs
-    @item @option{--ooc}: Enable out-of-core (available only with StarPU)
-    @item @option{--bound}: Compare result to area bound (available only with StarPU)
-(default: @option{0})
-  @end itemize
+    @item Machine parameters
+    @itemize @bullet
+       @item @option{-t x, --threads=x}: Number of CPU workers (default: automatic detection through runtime)
+       @item @option{-g x, --gpus=x}: Number of GPU workers (default: @option{0})
+       @item @option{-P x, --P=x}:  Rows (P) in the PxQ process grid (deafult: @option{1})
+       @item @option{--nocpu}: All GPU kernels are exclusively executed on GPUs (default: @option{0})
+    @end itemize
+    @item Matrix parameters
+    @itemize @bullet
+      @item @option{-m x, --m=x, --M=x}: Dimension (M) of the matrices (default: @option{N})
+      @item @option{-n x, --n=x, --N=x}: Dimension (N) of the matrices
+      @item @option{-N R, --n_range=R}: Range of N values to time with R=Start:Stop:Step (default: @option{500:5000:500})
+      @item @option{-k x, --k=x, --K=x, --nrhs=x}: Dimension (K) of the matrices or number of right-hand size (default: @option{1}). This is useful for GEMM like algorithms (k is the shared dimension and must be defined >1 to consider matrices and not vectors)
+      @item @option{-b x, --nb=x}: NB size. (default: @option{320})
+      @item @option{-i x, --ib=x}: IB size. (default: @option{32})
+    @end itemize
+    @item Check/prints
+    @itemize @bullet
+      @item @option{--niter=x}: number of iterations performed for each test (default: @option{1})
+      @item @option{-W, --nowarnings}: Do not show warnings
+      @item @option{-w, --nowarmup}: Cancel the warmup run to pre-load libraries
+      @item @option{-c, --check}: Check result
+      @item @option{-C, --inv}: Check on inverse
+      @item @option{--mode=x}: Change the xLATMS matrix mode generation for SVD/EVD (default: @option{4}). It must be between 0 and 20 included.
+    @end itemize
+    @item Profiling parameters
+    @itemize @bullet
+      @item @option{-T, --trace}: Enable trace generation
+      @item @option{--progress}: Display progress indicator
+      @item @option{-d, --dag}: Enable DAG generation. Generates a dot_dag_file.dot.
+      @item @option{-p, --profile}: Print profiling informations
+    @end itemize
+    @item HQR parameters
+    @itemize @bullet
+      @item @option{-a x, --qr_a=x, --rhblk=x}: Define the size of the local TS trees in housholder reduction trees for QR and LQ factorization. N is the size of each subdomain (default: @option{-1})
+      @item @option{-l x, --llvl=x}: Tree used for low level reduction inside nodes (default: @option{-1})
+      @item @option{-L x, --hlvl=x}: Tree used for high level reduction between nodes, only if P > 1 (default: @option{-1}). Possible values are -1: Automatic, 0: Flat, 1: Greedy, 2: Fibonacci, 3: Binary, 4: Replicated greedy.
+      @item @option{-D, --domino}: Enable the domino between upper and lower trees
+    @end itemize
+    @item Advanced options
+    @itemize @bullet
+      @item @option{--nobigmat}: Disable single large matrix allocation for multiple tiled allocations
+      @item @option{-s, --sync}: Enable synchronous calls in wrapper function such as POTRI
+      @item @option{-o, --ooc}: Enable out-of-core (available only with StarPU)
+      @item @option{-G, --gemm3m}: Use gemm3m complex method
+      @item @option{--bound}: Compare result to area bound
+    @end itemize

  List of timing algorithms available:
  @itemize @bullet

--- a/example/basic_zposv/basic_posv.h
+++ b/example/basic_zposv/basic_posv.h
@@ -117,7 +117,6 @@ enum iparam_examples {
    /* Added for StarPU version */
    IPARAM_PROFILE,
    IPARAM_PRINT_ERRORS,
-    IPARAM_PEAK,
    IPARAM_PARALLEL_TASKS,
    IPARAM_NO_CPU,
    IPARAM_BOUND,
@@ -132,7 +131,6 @@ enum dparam_examples {
  IPARAM_XNORM,
  IPARAM_RNORM,
  IPARAM_AinvNORM,
-  IPARAM_ESTIMATED_PEAK,
  IPARAM_RES,
  /* Begin section for hydra integration tool */
  IPARAM_THRESHOLD_CHECK, /* Maximum value accepted for: |Ax-b||/N/eps/(||A||||x||+||b||) */
@@ -179,7 +177,6 @@ static void init_iparam(int iparam[IPARAM_SIZEOF]){
    iparam[IPARAM_Q             ] = 1;
    iparam[IPARAM_PROFILE       ] = 0;
    iparam[IPARAM_PRINT_ERRORS  ] = 0;
-    iparam[IPARAM_PEAK          ] = 0;
    iparam[IPARAM_PARALLEL_TASKS] = 0;
    iparam[IPARAM_NO_CPU        ] = 0;
    iparam[IPARAM_BOUND         ] = 0;
@@ -210,7 +207,6 @@ static void print_header(char *prog_name, int * iparam) {
    const char *bound_header   = iparam[IPARAM_BOUND]   ? "   thGflop/s" : "";
    const char *check_header   = iparam[IPARAM_CHECK]   ? "     ||Ax-b||       ||A||       ||x||       ||b|| ||Ax-b||/N/eps/(||A||||x||+||b||)  RETURN" : "";
    const char *inverse_header = iparam[IPARAM_INVERSE] ? " ||I-A*Ainv||       ||A||    ||Ainv||       ||Id - A*Ainv||/((||A|| ||Ainv||).N.eps)" : "";
-    const char *peak_header    = iparam[IPARAM_PEAK]    ? "  (% of peak)  peak" : "";
 #if defined(CHAMELEON_SIMULATION)
    double    eps = 0.;
 #else
@@ -235,8 +231,8 @@ static void print_header(char *prog_name, int * iparam) {
            iparam[IPARAM_IB],
            eps );

-    printf( "#     M       N  K/NRHS   seconds   Gflop/s Deviation%s%s%s\n",
-            bound_header, peak_header, iparam[IPARAM_INVERSE] ? inverse_header : check_header);
+    printf( "#     M       N  K/NRHS   seconds   Gflop/s Deviation%s%s\n",
+            bound_header, iparam[IPARAM_INVERSE] ? inverse_header : check_header);
    printf( "# %5.0d   %5.0d   %5.0d\n", iparam[IPARAM_N], iparam[IPARAM_N], iparam[IPARAM_K]);
    return;
 }

--- a/runtime/starpu/include/runtime_codelet_profile.h
+++ b/runtime/starpu/include/runtime_codelet_profile.h
@@ -67,53 +67,11 @@
                    }                                                                          \
            }                                                                                  \
    }                                                                                          \
-    void estimate_##name##_sustained_peak(double *res_peak)                                    \
-    {                                                                                          \
-        /* We use a heuristic where we assume that all GPUs have some work, and                \
-         * that some CPUs may not have been involved. This may not be                          \
-         * applicable to small problems where only a subset of the processing                  \
-         * units are used. We assume that all CPUs are the same, so we multiply                \
-         * the best performance obtained on a CPU by the number of CPUs, and we                \
-         * add this to the sum of the performance obtained by the GPUs. */                     \
-        double peak = 0.0;                                                                     \
-        double best_cpu = 0.0;                                                                 \
-        unsigned ncpus = 0;                                                                    \
-                                                                                               \
-        unsigned worker;                                                                       \
-        for (worker = 0; worker < starpu_worker_get_count(); worker++)                         \
-            {                                                                                  \
-                unsigned cpu_worker = (starpu_worker_get_type(worker) == STARPU_CPU_WORKER);   \
-                if (cpu_worker)                                                                \
-                    ncpus++;                                                                   \
-                                                                                               \
-                if (name##_perf[worker].n > 0)                                                 \
-                    {                                                                          \
-                        long   n    = name##_perf[worker].n;                                   \
-                        double sum  = name##_perf[worker].sum;                                 \
-                        double avg = sum / n;                                                  \
-                                                                                               \
-                        if (cpu_worker)                                                        \
-                            {                                                                  \
-                                if (avg > best_cpu)                                            \
-                                    best_cpu = avg;                                            \
-                            }                                                                  \
-                        else                                                                   \
-                            {                                                                  \
-                                peak += avg;                                                   \
-                            }                                                                  \
-                    }                                                                          \
-            }                                                                                  \
-                                                                                               \
-        peak += ncpus * best_cpu;                                                              \
-                                                                                               \
-        *res_peak = peak;                                                                      \
-    }                                                                                          \

 #define CHAMELEON_CL_CB_HEADER(name)                        \
    extern struct starpu_perfmodel*cl_##name##_save;    \
    extern struct starpu_perfmodel cl_##name##_fake;    \
    void cl_##name##_callback();                        \
-    void profiling_display_##name##_info(void);         \
-    void estimate_##name##_sustained_peak(double *res)
+    void profiling_display_##name##_info(void);

 #endif /* __CODELET_PROFILE_H__ */
--- a/timing/timing.c
+++ b/timing/timing.c
@@ -221,34 +221,30 @@ Test(int64_t n, int *iparam) {
    if ( MORSE_My_Mpi_Rank() == 0) {
        printf( "%9.3f %9.2f +-%7.2f  ", sumt/niter, gflops, sd);

-        if (iparam[IPARAM_BOUND])
+        if (iparam[IPARAM_BOUND]) {
            printf(" %9.2f",  sumgf_upper/niter);
-
-        if ( iparam[IPARAM_PEAK] )
-        {
-            if (dparam[IPARAM_ESTIMATED_PEAK]<0.0f)
-                printf("  n/a    n/a   ");
-            else
-                printf("  %5.2f%%  %9.2f ", 100.0f*(gflops/dparam[IPARAM_ESTIMATED_PEAK]), dparam[IPARAM_ESTIMATED_PEAK]);
        }

        if ( iparam[IPARAM_CHECK] ){
            hres = ( dparam[IPARAM_RES] / n / eps / (dparam[IPARAM_ANORM] * dparam[IPARAM_XNORM] + dparam[IPARAM_BNORM] ) > dparam[IPARAM_THRESHOLD_CHECK] );

-            if (hres)
+            if (hres) {
                printf( "%8.5e %8.5e %8.5e %8.5e                       %8.5e FAILURE",
                    dparam[IPARAM_RES], dparam[IPARAM_ANORM], dparam[IPARAM_XNORM], dparam[IPARAM_BNORM],
                    dparam[IPARAM_RES] / n / eps / (dparam[IPARAM_ANORM] * dparam[IPARAM_XNORM] + dparam[IPARAM_BNORM] ));
-            else
+            }
+            else {
                printf( "%8.5e %8.5e %8.5e %8.5e                       %8.5e SUCCESS",
                    dparam[IPARAM_RES], dparam[IPARAM_ANORM], dparam[IPARAM_XNORM], dparam[IPARAM_BNORM],
                    dparam[IPARAM_RES] / n / eps / (dparam[IPARAM_ANORM] * dparam[IPARAM_XNORM] + dparam[IPARAM_BNORM] ));
+            }
        }

-        if ( iparam[IPARAM_INVERSE] )
+        if ( iparam[IPARAM_INVERSE] ) {
            printf( " %8.5e %8.5e %8.5e     %8.5e",
                    dparam[IPARAM_RNORM], dparam[IPARAM_ANORM], dparam[IPARAM_AinvNORM],
                    dparam[IPARAM_RNORM] /((dparam[IPARAM_ANORM] * dparam[IPARAM_AinvNORM])*n*eps));
+        }

        printf("\n");

@@ -387,7 +383,6 @@ show_help(char *prog_name) {
            "    -s, --sync             Enable synchronous calls in wrapper function such as POTRI\n"
            "    -o, --ooc              Enable out-of-core (available only with StarPU)\n"
            "    -G, --gemm3m           Use gemm3m complex method\n"
-            //"        --peak             ?\n"todo
            "        --bound            Compare result to area bound\n"
            "\n");
 }
@@ -398,7 +393,6 @@ print_header(char *prog_name, int * iparam) {
    const char *bound_header   = iparam[IPARAM_BOUND]   ? "   thGflop/s" : "";
    const char *check_header   = iparam[IPARAM_CHECK]   ? "     ||Ax-b||       ||A||       ||x||       ||b|| ||Ax-b||/N/eps/(||A||||x||+||b||)  RETURN" : "";
    const char *inverse_header = iparam[IPARAM_INVERSE] ? " ||I-A*Ainv||       ||A||    ||Ainv||       ||Id - A*Ainv||/((||A|| ||Ainv||).N.eps)" : "";
-    const char *peak_header    = iparam[IPARAM_PEAK]    ? "  (% of peak)  peak" : "";
 #if defined(CHAMELEON_SIMULATION)
    _PREC    eps = 0.;
 #else
@@ -431,8 +425,8 @@ print_header(char *prog_name, int * iparam) {
            iparam[IPARAM_IB],
            eps );

-    printf( "#     M       N  K/NRHS   seconds   Gflop/s Deviation%s%s%s\n",
-            bound_header, peak_header, iparam[IPARAM_INVERSE] ? inverse_header : check_header);
+    printf( "#     M       N  K/NRHS   seconds   Gflop/s Deviation%s%s\n",
+            bound_header, iparam[IPARAM_INVERSE] ? inverse_header : check_header);
    return;
 }

@@ -482,7 +476,6 @@ static struct option long_options[] =
    {"sync",          no_argument,       0,      's'},
    {"ooc",           no_argument,       0,      'o'},
    {"gemm3m",        no_argument,       0,      'G'},
-    {"peak",          no_argument,       0,      '4'},
    {"bound",         no_argument,       0,      '5'},
    {0, 0, 0, 0}
 };
@@ -496,7 +489,7 @@ set_iparam_default(int *iparam){
    iparam[IPARAM_THRDNBR       ] = -1;
    iparam[IPARAM_THRDNBR_SUBGRP] = 1;
    iparam[IPARAM_M             ] = -1;
-    iparam[IPARAM_N             ] = 500;
+    iparam[IPARAM_N             ] = -1;
    iparam[IPARAM_K             ] = 1;
    iparam[IPARAM_LDA           ] = -1;
    iparam[IPARAM_LDB           ] = -1;
@@ -611,7 +604,6 @@ parse_arguments(int *_argc, char ***_argv, int *iparam, int *start, int *stop, i
        case 's' : iparam[IPARAM_ASYNC         ] = 0; break;
        case 'o' : iparam[IPARAM_OOC           ] = 1; break;
        case 'G' : iparam[IPARAM_GEMM3M        ] = 1; break;
-        case '4' : iparam[IPARAM_PEAK          ] = 1; break;
        case '5' : iparam[IPARAM_BOUND         ] = 1; break;
        case 'h' :
        case '?' :
@@ -624,7 +616,8 @@ parse_arguments(int *_argc, char ***_argv, int *iparam, int *start, int *stop, i

 int
 main(int argc, char *argv[]) {
-    int i, m, mx, nx;
+    int i, m, n, mx, nx;
+    int status;
    int nbnode = 1;
    int start =  500;
    int stop  = 5000;
@@ -644,6 +637,7 @@ main(int argc, char *argv[]) {
    }
 #endif

+    n  = iparam[IPARAM_N];
    m  = iparam[IPARAM_M];
    mx = iparam[IPARAM_MX];
    nx = iparam[IPARAM_NX];
@@ -709,26 +703,41 @@ main(int argc, char *argv[]) {

    if (step < 1) step = 1;

-    int status = Test( -1, iparam ); /* print header */
+    status = Test( -1, iparam ); /* print header */
    if (status != MORSE_SUCCESS) return status;
-    for (i = start; i <= stop; i += step)
-    {
-        if ( nx > 0 ) {
-            iparam[IPARAM_M] = i;
-            iparam[IPARAM_N] = chameleon_max(1, i/nx);
-        } else if ( mx > 0 ) {
-            iparam[IPARAM_M] = chameleon_max(1, i/mx);
-            iparam[IPARAM_N] = i;
-        } else {
-            if ( m == -1 )
+    if ( n == -1 ){
+        for (i = start; i <= stop; i += step)
+        {
+            if ( nx > 0 ) {
                iparam[IPARAM_M] = i;
-            iparam[IPARAM_N] = i;
+                iparam[IPARAM_N] = chameleon_max(1, i/nx);
+            }
+            else if ( mx > 0 ) {
+                iparam[IPARAM_M] = chameleon_max(1, i/mx);
+                iparam[IPARAM_N] = i;
+            }
+            else {
+                if ( m == -1 ) {
+                    iparam[IPARAM_M] = i;
+                }
+                iparam[IPARAM_N] = i;
+            }
+            status = Test( iparam[IPARAM_N], iparam );
+            if (status != MORSE_SUCCESS) {
+                return status;
+            }
+            success += status;
+        }
+    }
+    else {
+        if ( m == -1 ) {
+            iparam[IPARAM_M] = n;
        }
-        int status = Test( iparam[IPARAM_N], iparam );
+        iparam[IPARAM_N] = n;
+        status = Test( iparam[IPARAM_N], iparam );
        if (status != MORSE_SUCCESS) return status;
        success += status;
    }
-
    MORSE_Finalize();
    return success;
 }

--- a/timing/timing.h
+++ b/timing/timing.h
@@ -57,7 +57,6 @@ enum iparam_timing {
    /* Added for StarPU version */
    IPARAM_PROFILE,
    IPARAM_PRINT_WARNINGS,
-    IPARAM_PEAK,
    IPARAM_PARALLEL_TASKS,
    IPARAM_NO_CPU,
    IPARAM_BOUND,
@@ -80,7 +79,6 @@ enum dparam_timing {
  IPARAM_XNORM,
  IPARAM_RNORM,
  IPARAM_AinvNORM,
-  IPARAM_ESTIMATED_PEAK,
  IPARAM_RES,
  /* Begin section for hydra integration tool */
  IPARAM_THRESHOLD_CHECK, /* Maximum value accepted for: |Ax-b||/N/eps/(||A||||x||+||b||) */