From 2bf49014c5f706c7412f15457a49b256138ea17d Mon Sep 17 00:00:00 2001
From: Mathieu Faverge <mathieu.faverge@inria.fr>
Date: Tue, 13 Oct 2020 18:50:55 +0200
Subject: [PATCH] Separate kernel profile from trace, and add profile option to
 display information

---
 control/context.c             |  8 ++++++--
 include/chameleon/constants.h | 21 +++++++++++----------
 testing/chameleon_ztesting.c  | 14 +++++++++++++-
 3 files changed, 30 insertions(+), 13 deletions(-)

diff --git a/control/context.c b/control/context.c
index 358f28ea9..1960909e7 100644
--- a/control/context.c
+++ b/control/context.c
@@ -147,9 +147,11 @@ int CHAMELEON_Enable(int option)
             chamctxt->autotuning_enabled = CHAMELEON_TRUE;
             break;
         case CHAMELEON_PROFILING_MODE:
-            chamctxt->profiling_enabled = CHAMELEON_TRUE;
             RUNTIME_start_profiling();
             break;
+        case CHAMELEON_KERNELPROFILE_MODE:
+            chamctxt->profiling_enabled = CHAMELEON_TRUE;
+            break;
         case CHAMELEON_PROGRESS:
             chamctxt->progress_enabled = CHAMELEON_TRUE;
             break;
@@ -218,9 +220,11 @@ int CHAMELEON_Disable(int option)
             chamctxt->autotuning_enabled = CHAMELEON_FALSE;
             break;
         case CHAMELEON_PROFILING_MODE:
-            chamctxt->profiling_enabled = CHAMELEON_FALSE;
             RUNTIME_stop_profiling();
             break;
+        case CHAMELEON_KERNELPROFILE_MODE:
+            chamctxt->profiling_enabled = CHAMELEON_FALSE;
+            break;
         case CHAMELEON_PROGRESS:
             chamctxt->progress_enabled = CHAMELEON_FALSE;
             break;
diff --git a/include/chameleon/constants.h b/include/chameleon/constants.h
index fce41282c..ebf21af77 100644
--- a/include/chameleon/constants.h
+++ b/include/chameleon/constants.h
@@ -196,16 +196,17 @@ typedef enum chameleon_store_e {
 /**
  *  State machine switches
  */
-#define CHAMELEON_WARNINGS        1
-#define CHAMELEON_ERRORS          2
-#define CHAMELEON_AUTOTUNING      3
-#define CHAMELEON_DAG             4
-#define CHAMELEON_PROFILING_MODE  5
-#define CHAMELEON_PARALLEL_MODE   6
-#define CHAMELEON_BOUND           7
-#define CHAMELEON_PROGRESS        8
-#define CHAMELEON_GEMM3M          9
-#define CHAMELEON_GENERIC        10
+#define CHAMELEON_WARNINGS            1
+#define CHAMELEON_ERRORS              2
+#define CHAMELEON_AUTOTUNING          3
+#define CHAMELEON_DAG                 4
+#define CHAMELEON_PROFILING_MODE      5
+#define CHAMELEON_KERNELPROFILE_MODE  6
+#define CHAMELEON_PARALLEL_MODE       7
+#define CHAMELEON_BOUND               8
+#define CHAMELEON_PROGRESS            9
+#define CHAMELEON_GEMM3M             10
+#define CHAMELEON_GENERIC            11
 
 /**
  *  CHAMELEON constants - configuration parameters
diff --git a/testing/chameleon_ztesting.c b/testing/chameleon_ztesting.c
index 82e396920..e535b6ff7 100644
--- a/testing/chameleon_ztesting.c
+++ b/testing/chameleon_ztesting.c
@@ -41,6 +41,7 @@ static parameter_t parameters[] = {
     { "trace",    "Enable the trace generation",              -30, PARAM_OPTION, 0, 0, TestValInt, {0}, NULL, pread_int, sprint_int },
     { "nowarmup", "Disable the warmup run to load libraries", -31, PARAM_OPTION, 0, 0, TestValInt, {0}, NULL, pread_int, sprint_int },
     { "mtxfmt",   "Change the way the matrix is stored (0: global, 1: tiles, 2: OOC)", -32, PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 1, 6, TestValInt, {0}, NULL, pread_int, sprint_int },
+    { "profile",  "Display the kernel profiling",             -33, PARAM_OPTION, 0, 0, TestValInt, {0}, NULL, pread_int, sprint_int },
 
     { NULL, "Machine parameters", 0, PARAM_OPTION, 0, 0, 0, {0}, NULL, NULL, NULL },
     { "threads", "Number of CPU workers per node",      't', PARAM_OPTION | PARAM_OUTPUT, 1, 7, TestValInt, {-1}, NULL, pread_int, sprint_int },
@@ -489,7 +490,7 @@ parameters_destroy()
 int main (int argc, char **argv) {
 
     int ncores, ngpus, human, check, i, niter;
-    int trace, nowarmup;
+    int trace, nowarmup, profile;
     int rc, info = 0;
     int run_id = 0;
     char *func_name;
@@ -513,6 +514,7 @@ int main (int argc, char **argv) {
     niter     = parameters_getvalue_int( "niter"    );
     trace     = parameters_getvalue_int( "trace"    );
     nowarmup  = parameters_getvalue_int( "nowarmup" );
+    profile   = parameters_getvalue_int( "profile" );
 
     CHAMELEON_Init( ncores, ngpus );
 
@@ -541,6 +543,11 @@ int main (int argc, char **argv) {
         run_arg_list_destroy( &copy );
     }
 
+    /* Start kernel statistics */
+    if ( profile ) {
+        CHAMELEON_Enable( CHAMELEON_KERNELPROFILE_MODE );
+    }
+
     /* Start tracing */
     if ( trace ) {
         CHAMELEON_Enable( CHAMELEON_PROFILING_MODE );
@@ -573,6 +580,11 @@ int main (int argc, char **argv) {
         CHAMELEON_Disable( CHAMELEON_PROFILING_MODE );
     }
 
+    /* Stop kernel statistics and display results */
+    if ( profile ) {
+        CHAMELEON_Disable( CHAMELEON_KERNELPROFILE_MODE );
+        RUNTIME_kernelprofile_display();
+    }
     free( runlist );
 
     CHAMELEON_Finalize();
-- 
GitLab