From 2f441bb8902a45a71e7abaa0854872a1bb0bad07 Mon Sep 17 00:00:00 2001
From: Samuel Thibault <samuel.thibault@ens-lyon.org>
Date: Fri, 17 Mar 2017 14:01:44 +0100
Subject: [PATCH 1/2] Adding pruning stats on top of memaccess

---
 CMakeLists.txt                             |  7 ++++
 include/runtime.h                          |  3 ++
 runtime/parsec/control/runtime_profiling.c | 10 +++++
 runtime/quark/control/runtime_profiling.c  | 10 +++++
 runtime/starpu/control/runtime_profiling.c | 21 ++++++++++
 runtime/starpu/include/morse_starpu.h      | 48 +++++++++++++++++++---
 runtime/starpu/include/runtime_profiling.h |  7 ++++
 timing/timing.h                            |  2 +
 8 files changed, 102 insertions(+), 6 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 574141293..631af51e9 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -232,6 +232,13 @@ if(CHAMELEON_SCHED_STARPU)
         message("-- ${BoldGreen}CHAMELEON_SIMULATION is set to OFF, turn it ON to use"
             " SIMULATION mode (only with StarPU compiled with SimGrid)${ColourReset}")
     endif()
+    option(CHAMELEON_ENABLE_PRUNING_STATS "Enable pruning statistics" OFF)
+    if (NOT CHAMELEON_ENABLE_PRUNING_STATS)
+        message("-- ${BoldGreen}CHAMELEON_ENABLE_PRUNING_STATS is set to OFF, turn it ON to build pruning statistics${ColourReset}")
+    endif()
+    if(CHAMELEON_ENABLE_PRUNING_STATS)
+        add_definitions(-DCHAMELEON_ENABLE_PRUNING_STATS)
+    endif(CHAMELEON_ENABLE_PRUNING_STATS)
 endif()
 
 # Initially we need to generate files for different precisions
diff --git a/include/runtime.h b/include/runtime.h
index 70b18645c..ff6728d68 100644
--- a/include/runtime.h
+++ b/include/runtime.h
@@ -126,6 +126,9 @@ double RUNTIME_get_time();
 void RUNTIME_start_profiling();
 void RUNTIME_stop_profiling();
 
+void RUNTIME_start_stats();
+void RUNTIME_stop_stats();
+
 #if defined(PRECISION_z)
 void RUNTIME_zdisplay_allprofile ();
 void RUNTIME_zdisplay_oneprofile (MORSE_kernel_t);
diff --git a/runtime/parsec/control/runtime_profiling.c b/runtime/parsec/control/runtime_profiling.c
index ae25f517e..1062462ce 100644
--- a/runtime/parsec/control/runtime_profiling.c
+++ b/runtime/parsec/control/runtime_profiling.c
@@ -88,6 +88,16 @@ void RUNTIME_stop_profiling()
     morse_warning("RUNTIME_stop_profiling()", "FxT profiling is not available with PaRSEC\n");
 }
 
+void RUNTIME_start_stats()
+{
+    morse_warning("RUNTIME_start_stats()", "pruning stats are not available with PaRSEC\n");
+}
+
+void RUNTIME_stop_stats()
+{
+    morse_warning("RUNTIME_stop_stats()", "pruning stats are not available with PaRSEC\n");
+}
+
 void RUNTIME_schedprofile_display(void)
 {
     morse_warning("RUNTIME_schedprofile_display(parsec)", "Scheduler profiling is not available with PaRSEC\n");
diff --git a/runtime/quark/control/runtime_profiling.c b/runtime/quark/control/runtime_profiling.c
index 76afcc267..0db7e5513 100644
--- a/runtime/quark/control/runtime_profiling.c
+++ b/runtime/quark/control/runtime_profiling.c
@@ -103,6 +103,16 @@ void RUNTIME_stop_profiling()
     morse_warning("RUNTIME_stop_profiling()", "FxT profiling is not available with Quark\n");
 }
 
+void RUNTIME_start_stats()
+{
+    morse_warning("RUNTIME_start_stats()", "pruning stats are not available with Quark\n");
+}
+
+void RUNTIME_stop_stats()
+{
+    morse_warning("RUNTIME_stop_stats()", "pruning stats are not available with Quark\n");
+}
+
 void RUNTIME_schedprofile_display(void)
 {
     morse_warning("RUNTIME_schedprofile_display(quark)", "Scheduler profiling is not available with Quark\n");
diff --git a/runtime/starpu/control/runtime_profiling.c b/runtime/starpu/control/runtime_profiling.c
index 4c7d2e1b3..33bbb4021 100644
--- a/runtime/starpu/control/runtime_profiling.c
+++ b/runtime/starpu/control/runtime_profiling.c
@@ -29,6 +29,12 @@
 #include <starpu_fxt.h>
 #endif
 
+#ifdef CHAMELEON_ENABLE_PRUNING_STATS
+unsigned long RUNTIME_total_tasks;
+unsigned long RUNTIME_exec_tasks;
+unsigned long RUNTIME_comm_tasks;
+unsigned long RUNTIME_changed_tasks;
+#endif
 
 double RUNTIME_get_time(){
     return starpu_timing_now()*1e-6;
@@ -50,6 +56,21 @@ void RUNTIME_stop_profiling(){
 #endif
 }
 
+void RUNTIME_start_stats(){
+#ifdef CHAMELEON_ENABLE_PRUNING_STATS
+    RUNTIME_total_tasks = 0;
+    RUNTIME_exec_tasks = 0;
+    RUNTIME_comm_tasks = 0;
+    RUNTIME_changed_tasks = 0;
+#endif
+}
+
+void RUNTIME_stop_stats(){
+#ifdef CHAMELEON_ENABLE_PRUNING_STATS
+    fprintf(stderr, "\ntasks: %u = exec: %u + comm: %u + changed: %u\n", RUNTIME_total_tasks, RUNTIME_exec_tasks, RUNTIME_comm_tasks, RUNTIME_changed_tasks);
+#endif
+}
+
 void RUNTIME_profiling_display_info(const char *kernel_name, measure_t perf[STARPU_NMAXWORKERS])
 {
     int header = 1;
diff --git a/runtime/starpu/include/morse_starpu.h b/runtime/starpu/include/morse_starpu.h
index 473e68aa8..bee6ca006 100644
--- a/runtime/starpu/include/morse_starpu.h
+++ b/runtime/starpu/include/morse_starpu.h
@@ -86,16 +86,52 @@ typedef struct starpu_conf starpu_conf_t;
 
 void RUNTIME_set_reduction_methods(starpu_data_handle_t handle, MORSE_enum dtyp);
 
-#define RUNTIME_BEGIN_ACCESS_DECLARATION
+#ifdef CHAMELEON_ENABLE_PRUNING_STATS
+
+#define RUNTIME_PRUNING_STATS_BEGIN_ACCESS_DECLARATION \
+    int __morse_exec = 0; \
+    int __morse_changed = 0;
+
+#define RUNTIME_PRUNING_STATS_ACCESS_W(A, Am, An) \
+    if (morse_desc_islocal(A, Am, An)) \
+        __morse_exec = 1;
+
+#define RUNTIME_PRUNING_STATS_END_ACCESS_DECLARATION \
+    RUNTIME_total_tasks++; \
+    if (__morse_exec) \
+        RUNTIME_exec_tasks++; \
+    else if (__morse_need_submit) \
+        RUNTIME_comm_tasks++; \
+    else if (__morse_changed) \
+        RUNTIME_changed_tasks++;
+
+#define RUNTIME_PRUNING_STATS_RANK_CHANGED(rank) \
+    int __morse_myrank; \
+    RUNTIME_comm_rank(&__morse_myrank); \
+    __morse_exec = (rank) == __morse_myrank; \
+    __morse_changed = 1; \
 
-#define RUNTIME_ACCESS_R(A, Am, An)
+#else
+#define RUNTIME_PRUNING_STATS_BEGIN_ACCESS_DECLARATION
+#define RUNTIME_PRUNING_STATS_ACCESS_W(A, Am, An)
+#define RUNTIME_PRUNING_STATS_END_ACCESS_DECLARATION
+#endif
+
+#define RUNTIME_BEGIN_ACCESS_DECLARATION \
+    RUNTIME_PRUNING_STATS_BEGIN_ACCESS_DECLARATION
+
+#define RUNTIME_ACCESS_R(A, Am, An) \
 
-#define RUNTIME_ACCESS_W(A, Am, An)
+#define RUNTIME_ACCESS_W(A, Am, An) \
+    RUNTIME_PRUNING_STATS_ACCESS_W(A, Am, An)
 
-#define RUNTIME_ACCESS_RW(A, Am, An)
+#define RUNTIME_ACCESS_RW(A, Am, An) \
+    RUNTIME_PRUNING_STATS_ACCESS_W(A, Am, An)
 
-#define RUNTIME_RANK_CHANGED(rank)
+#define RUNTIME_RANK_CHANGED(rank) \
+    RUNTIME_PRUNING_STATS_RANK_CHANGED(rank)
 
-#define RUNTIME_END_ACCESS_DECLARATION
+#define RUNTIME_END_ACCESS_DECLARATION \
+    RUNTIME_PRUNING_STATS_END_ACCESS_DECLARATION;
 
 #endif /* _MORSE_STARPU_H_ */
diff --git a/runtime/starpu/include/runtime_profiling.h b/runtime/starpu/include/runtime_profiling.h
index 4e712053d..0d25ee23f 100644
--- a/runtime/starpu/include/runtime_profiling.h
+++ b/runtime/starpu/include/runtime_profiling.h
@@ -26,6 +26,13 @@
 #ifndef _PROFILING_H_
 #define _PROFILING_H_
 
+#ifdef CHAMELEON_ENABLE_PRUNING_STATS
+extern unsigned long RUNTIME_total_tasks;
+extern unsigned long RUNTIME_exec_tasks;
+extern unsigned long RUNTIME_comm_tasks;
+extern unsigned long RUNTIME_changed_tasks;
+#endif
+
 typedef struct measure_s {
     double sum;
     double sum2;
diff --git a/timing/timing.h b/timing/timing.h
index 08190507a..3d33748a6 100644
--- a/timing/timing.h
+++ b/timing/timing.h
@@ -159,6 +159,7 @@ enum dparam_timing {
  *
  */
 #define START_TRACING()                        \
+    RUNTIME_start_stats();                     \
     if(iparam[IPARAM_TRACE] == 2) {            \
     	RUNTIME_start_profiling();             \
     }                                          \
@@ -167,6 +168,7 @@ enum dparam_timing {
     }
 
 #define STOP_TRACING()                         \
+    RUNTIME_stop_stats();                      \
     if(iparam[IPARAM_TRACE] == 2) {            \
     	RUNTIME_stop_profiling();              \
     }                                          \
-- 
GitLab


From 169b568783e1963c8310f890d98f8b45610249ec Mon Sep 17 00:00:00 2001
From: Samuel Thibault <samuel.thibault@ens-lyon.org>
Date: Mon, 20 Mar 2017 15:08:01 +0100
Subject: [PATCH 2/2] fix build without pruning stats

---
 runtime/starpu/include/morse_starpu.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/runtime/starpu/include/morse_starpu.h b/runtime/starpu/include/morse_starpu.h
index bee6ca006..987d179f0 100644
--- a/runtime/starpu/include/morse_starpu.h
+++ b/runtime/starpu/include/morse_starpu.h
@@ -115,6 +115,7 @@ void RUNTIME_set_reduction_methods(starpu_data_handle_t handle, MORSE_enum dtyp)
 #define RUNTIME_PRUNING_STATS_BEGIN_ACCESS_DECLARATION
 #define RUNTIME_PRUNING_STATS_ACCESS_W(A, Am, An)
 #define RUNTIME_PRUNING_STATS_END_ACCESS_DECLARATION
+#define RUNTIME_PRUNING_STATS_RANK_CHANGED(rank)
 #endif
 
 #define RUNTIME_BEGIN_ACCESS_DECLARATION \
-- 
GitLab