From d772fa027de4f5c355b3762293f6e5cc981ccba6 Mon Sep 17 00:00:00 2001
From: Valentin Trophime <valentin.trophime@inria.fr>
Date: Wed, 23 Oct 2024 15:43:35 +0200
Subject: [PATCH] update scipts to get cdf function

---
 examples/overhead.rs     |  33 +++++----
 scripts/heatmap.py       | 146 ++++++++++++++++++++++++++++++---------
 scripts/requirements.txt |  17 +++++
 3 files changed, 148 insertions(+), 48 deletions(-)
 create mode 100644 scripts/requirements.txt

diff --git a/examples/overhead.rs b/examples/overhead.rs
index 06df51a..5a06c1e 100644
--- a/examples/overhead.rs
+++ b/examples/overhead.rs
@@ -42,23 +42,26 @@ struct Args {
     reactivity_us: u64,
     #[arg(long)]
     n_iter: u32,
+    #[arg(long, default_value_t = 1)]
+    repeat: u32,
+    #[arg(long, default_value_t = false)]
+    output_ns: bool,
 }
 
 fn main() {
     let args = Args::parse();
-    let time_ref = measure_busy_wait(args);
-    let _ = embassy_futures::block_on(measure_iter(args));
-    let time_iter = embassy_futures::block_on(measure_iter(args));
-    let duration = std::time::Duration::from_micros(args.duration_task);
-    println!(
-        "Baseline: {} busy_sleep of {:?} took {:?}",
-        args.n_iter, duration, time_ref
-    );
-    println!(
-        "Preemptive {} iters total time {:?}",
-        args.n_iter, time_iter
-    );
-    let baseline = time_ref.as_secs_f64();
-    let overhead = (time_iter.as_secs_f64() - baseline) / baseline;
-    println!("Preemptive iterator overhead {:.4}%", overhead * 100.0);
+    for _ in 0..args.repeat {
+        let time_ref = measure_busy_wait(args);
+        let time_iter = embassy_futures::block_on(measure_iter(args));
+        if args.output_ns {
+            println!("Baseline: {}", time_ref.as_nanos());
+            println!("Preemptive: {}", time_iter.as_nanos());
+        } else {
+            println!("Baseline: {:?}", time_ref);
+            println!("Preemptive: {:?}", time_iter);
+            let baseline = time_ref.as_secs_f64();
+            let overhead = (time_iter.as_secs_f64() - baseline) / baseline;
+            println!("Preemptive iterator overhead {:.4}%", overhead * 100.0);
+        }
+    }
 }
diff --git a/scripts/heatmap.py b/scripts/heatmap.py
index ac05d78..40cd3d8 100755
--- a/scripts/heatmap.py
+++ b/scripts/heatmap.py
@@ -3,22 +3,26 @@
 import argparse
 import subprocess
 import itertools
+from bisect import bisect_left
 import os
 import re
 import pandas
+
+import scipy
 import matplotlib.pyplot as plt
 import numpy as np
 
 PARAMETERS = {
-    "REACTIVITY": [10],
+    "Reactivity": [10],
     "DurationTask": [10, 100, 1_000],
     "N_Iter": [100, 1_000, 10_000],
 }
-CSV_COLS = ["REACTIVITY", "DurationTask", "N_Iter", "Overhead"]
+CSV_COLS = ["Reactivity", "DurationTask", "N_Iter", "Baseline", "Preemptive"]
 N_REPEAT = 100
 
 EXEC = "target/release/examples/overhead"
-REGEX = re.compile(r"Preemptive iterator overhead (-?\d+\.\d+)")
+REGEX_BASELINE = re.compile(r"Baseline: (\d+)")
+REGEX_PREEMPT = re.compile(r"Preemptive: (\d+)")
 
 CONFIDENCE_INTERVAL = 0.95
 
@@ -37,7 +41,7 @@ STATS = ["min", "max", "median", "mean", "std", percent_low, percent_up]
 GATHER_STATS = {"Overhead": STATS}
 
 
-def collect_heatmap(out_csv: str):
+def collect_heatmap(out_csv: str, repeat: int):
     if os.path.exists(out_csv):
         user_input = input("Remove old csv (y/n) ?")
         if user_input.startswith("y"):
@@ -48,31 +52,34 @@ def collect_heatmap(out_csv: str):
     with open(out_csv, "w") as f:
         f.write(",".join(CSV_COLS) + "\n")
         for reac, dur, niter in itertools.product(*PARAMETERS.values()):
-            for _ in range(N_REPEAT):
-                out = subprocess.run(
-                    [
-                        EXEC,
-                        "--duration-task",
-                        str(dur),
-                        "--reactivity-us",
-                        str(reac),
-                        "--n-iter",
-                        str(niter),
-                    ],
-                    check=True,
-                    text=True,
-                    stdout=subprocess.PIPE,
-                )
-                assert out.stdout
-                capture = re.search(REGEX, out.stdout)
-                assert capture
-                overhead = capture.group(1)
-                f.write(f"{reac},{dur},{niter},{overhead}\n")
-                print(f"{reac},{dur},{niter},{overhead}")
+            out = subprocess.run(
+                [
+                    EXEC,
+                    "--duration-task",
+                    str(dur),
+                    "--reactivity-us",
+                    str(reac),
+                    "--n-iter",
+                    str(niter),
+                    "--repeat",
+                    str(repeat),
+                    "--output-ns",
+                ],
+                check=True,
+                text=True,
+                stdout=subprocess.PIPE,
+            )
+            assert out.stdout
+            baselines = re.finditer(REGEX_BASELINE, out.stdout)
+            premptives = re.finditer(REGEX_PREEMPT, out.stdout)
+            for cb, cp in zip(baselines, premptives):
+                f.write(f"{reac},{dur},{niter},{cb.group(1)},{cp.group(1)}\n")
+            print(f"{reac=} {dur=} {niter=} done")
 
 
 def plot_heatmap(path: str):
     df = pandas.read_csv(path)
+    df["Overhead"] = (df["Preemptive"] - df["Baseline"]) / df["Baseline"] * 100.0
     print(df)
     df = df.groupby(list(PARAMETERS)).agg(GATHER_STATS).reset_index()
     print(df)
@@ -111,24 +118,97 @@ def plot_heatmap(path: str):
     ax.set_xlabel("Duration of each iteration in approx e-7 secs.")
     ax.set_ylabel("Number of iterations.")
     ax.set_title(
-        f"Overhead in % of the iteration using preemptive iter vs native iter. (reactivity = {PARAMETERS['REACTIVITY'][0]} Î¼s)"
+        f"Overhead in % of the iteration using preemptive iter vs native iter. (reactivity = {PARAMETERS['Reactivity'][0]} Î¼s)"
     )
     plt.show()
 
 
+def plot_likelyhood(path: str, n_iter: int, task_dur: int, reac: int):
+    print(f"{task_dur=} {n_iter=} {reac=}")
+    df = pandas.read_csv(path)
+    df_filtered = df[
+        (df["N_Iter"] == n_iter)
+        & (df["DurationTask"] == task_dur)
+        & (df["Reactivity"] == reac)
+    ]
+    assert len(df_filtered) > 0
+    baselines = df_filtered["Baseline"].values
+    preemptives = df_filtered["Preemptive"].values
+    overhead2 = np.zeros(len(baselines) * len(preemptives))
+    i = 0
+    for base in baselines:
+        for t in preemptives:
+            overhead2[i] = (t - base) / base * 100.0
+            i += 1
+    overhead2.sort()
+    estimated = scipy.stats.ecdf(overhead2)
+    x = np.linspace(-5, 100, 10000)
+    y = estimated.cdf.evaluate(x)
+    plt.plot(x, y, label="ecdf")
+    cdf = scipy.stats.norm.cdf(overhead2)
+    plt.plot(overhead2, cdf, label="cdf")
+    plt.xlabel("x in %")
+    plt.xlim([-5, 50])
+    plt.ylabel("P(X < x)")
+    plt.legend()
+    plt.title("Probability than the real overhead is below x")
+    x_95 = x[bisect_left(y, 0.95)]
+    x_99 = x[bisect_left(y, 0.99)]
+    print(f"P(Overhead < x) < 95% for x = {x_95}")
+    print(f"P(Overhead < x) < 99% for x = {x_99}")
+    plt.show()
+
+
 def main():
     parser = argparse.ArgumentParser(
         formatter_class=argparse.ArgumentDefaultsHelpFormatter
     )
-    parser.add_argument("--collect-csv", help="File to generate.")
-    parser.add_argument("--plot-csv", help="File to user for plot.")
-    args = parser.parse_args()
+    subparsers = parser.add_subparsers(dest="subparser_name")
+    parser_collect = subparsers.add_parser(
+        "collect", formatter_class=argparse.ArgumentDefaultsHelpFormatter
+    )
+    parser_collect.add_argument("output_csv", help="File to generate.")
+    parser_collect.add_argument(
+        "--repeat",
+        type=int,
+        help="Number of times to repeat each run.",
+        default=N_REPEAT,
+    )
 
-    if args.plot_csv:
-        plot_heatmap(args.plot_csv)
+    parser_plot = subparsers.add_parser("plot")
+    parser_plot.add_argument("input_csv", help="File to use for plot.")
+
+    parser_likelyhood = subparsers.add_parser("likelyhood")
+    parser_likelyhood.add_argument("input_csv", help="File to use for plot.")
+    parser_likelyhood.add_argument(
+        "--n-iter",
+        help="Value of N_Iter to fix.",
+        type=int,
+        default=PARAMETERS["N_Iter"][0],
+    )
+    parser_likelyhood.add_argument(
+        "--task-dur",
+        help="Value of DurationTask to fix.",
+        type=int,
+        default=PARAMETERS["DurationTask"][0],
+    )
+    parser_likelyhood.add_argument(
+        "--reac",
+        help="Value of Reactivity to fix.",
+        type=int,
+        default=PARAMETERS["Reactivity"][0],
+    )
+    args = parser.parse_args()
 
-    if args.collect_csv:
-        collect_heatmap(args.collect_csv)
+    match args.subparser_name:
+        case "collect":
+            collect_heatmap(args.output_csv, args.repeat)
+        case "plot":
+            plot_heatmap(args.input_csv)
+        case "likelyhood":
+            plot_likelyhood(args.input_csv, args.n_iter, args.task_dur, args.reac)
+        case other:
+            print(f"Unknown command: {other}")
 
 
 if __name__ == "__main__":
diff --git a/scripts/requirements.txt b/scripts/requirements.txt
new file mode 100644
index 0000000..823c63c
--- /dev/null
+++ b/scripts/requirements.txt
@@ -0,0 +1,17 @@
+contourpy==1.3.0
+cycler==0.12.1
+fonttools==4.54.1
+kiwisolver==1.4.7
+matplotlib==3.9.2
+numpy==2.1.1
+packaging==24.1
+pandas==2.2.3
+pillow==10.4.0
+pyparsing==3.1.4
+python-dateutil==2.9.0.post0
+pytz==2024.2
+scipy==1.14.1
+seaborn==0.13.2
+six==1.16.0
+tabulate==0.9.0
+tzdata==2024.2
-- 
GitLab