diff --git a/.gitignore b/.gitignore
index c7da63d8c5e11a21a72a0350acf471c53bc80ecc..8a485b3fed026eaa193539fe5a0e48ee530ebaeb 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,8 @@
 /target
 pyrightconfig.json
 perf.data*
-.python-version # pyenv local generated file
+
+# pyenv local generated file
+.python-version
+
+*.csv
diff --git a/examples/overhead.rs b/examples/overhead.rs
index 17627e257a903eea1e049c5565659ab61de78638..06df51a228c34490f7e402c1a25185c12baee71d 100644
--- a/examples/overhead.rs
+++ b/examples/overhead.rs
@@ -1,28 +1,43 @@
-use std::time::Instant;
+#![feature(core_intrinsics)]
 
 use clap::Parser;
 use embassy_time::Duration;
 use preemptive_iterator::PreemptiveIterator;
+use std::time::Instant;
 
 async fn measure_iter(args: Args) -> std::time::Duration {
-    let dur = std::time::Duration::from_micros(args.duration_us);
     let reactivity = Duration::from_micros(args.reactivity_us);
     let start = Instant::now();
     (0..args.n_iter)
-        .preemptive_for_each(
-            |_| {
-                busy_wait(dur);
-            },
-            reactivity,
-        )
+        .preemptive_for_each(|_| work(args.duration_task), reactivity)
         .await;
     start.elapsed()
 }
 
+fn measure_busy_wait(args: Args) -> std::time::Duration {
+    let start = Instant::now();
+    for _ in 0..args.n_iter {
+        std::hint::black_box(work(args.duration_task));
+    }
+    start.elapsed()
+}
+
+fn work(task_duration: u64) {
+    let mut res = 0.0;
+    let mut i = 0;
+    loop {
+        res += (i as f64) * (i as f64);
+        i += 1;
+        if std::intrinsics::unlikely(i == task_duration) {
+            break;
+        }
+    }
+    std::hint::black_box(res);
+}
 #[derive(Parser, Debug, Clone, Copy)]
 struct Args {
     #[arg(long)]
-    duration_us: u64,
+    duration_task: u64,
     #[arg(long)]
     reactivity_us: u64,
     #[arg(long)]
@@ -31,9 +46,10 @@ struct Args {
 
 fn main() {
     let args = Args::parse();
-    let time_iter = embassy_futures::block_on(measure_iter(args));
     let time_ref = measure_busy_wait(args);
-    let duration = std::time::Duration::from_micros(args.duration_us);
+    let _ = embassy_futures::block_on(measure_iter(args));
+    let time_iter = embassy_futures::block_on(measure_iter(args));
+    let duration = std::time::Duration::from_micros(args.duration_task);
     println!(
         "Baseline: {} busy_sleep of {:?} took {:?}",
         args.n_iter, duration, time_ref
@@ -46,23 +62,3 @@ fn main() {
     let overhead = (time_iter.as_secs_f64() - baseline) / baseline;
     println!("Preemptive iterator overhead {:.4}%", overhead * 100.0);
 }
-
-fn measure_busy_wait(args: Args) -> std::time::Duration {
-    let duration = std::time::Duration::from_micros(args.duration_us);
-    let start = Instant::now();
-    for _ in 0..args.n_iter {
-        std::hint::black_box(busy_wait(duration));
-    }
-    start.elapsed()
-}
-
-fn busy_wait(duration: std::time::Duration) {
-    let start = Instant::now();
-    let mut counter = 0u64;
-    // Busy loop until the desired duration has passed
-    while start.elapsed() < duration {
-        counter += 1;
-    }
-    // Use the counter somehow to prevent it from being optimized out
-    std::hint::black_box(counter);
-}
diff --git a/scripts/callgrind.py b/scripts/callgrind.py
new file mode 100644
index 0000000000000000000000000000000000000000..29ef3ac07becb7a15265a47f58c3af2f1a5450d6
--- /dev/null
+++ b/scripts/callgrind.py
@@ -0,0 +1,170 @@
+# perf script event handlers, generated by perf script -g python
+# (c) 2016, Milian Wolff <milian.wolff@kdab.com>
+# (c) 2019, Lubos Lunak <l.lunak@kde.org>
+# Licensed under the terms of the GNU GPL License version 2
+#
+# This script converts perf data into the callgrind format.
+# The output can then be visualized in kcachegrind.
+#
+# Usage: perf script -s perf2calltree.py > perf.out
+#
+# NOTE: This script currently does not support conversion of data files
+#       that contain multiple event sources.
+
+import os
+import sys
+import subprocess
+from collections import defaultdict
+from subprocess import PIPE
+
+sys.path.append(os.environ['PERF_EXEC_PATH'] + \
+    '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+
+from Core import *
+from perf_trace_context import *
+
+try:
+  from subprocess import DEVNULL # py3k
+except ImportError:
+  import os
+  DEVNULL = open(os.devnull, 'wb')
+
+class Cost:
+  def __init__(self):
+    self.cost = 0
+    self.calls = 0
+
+  def add(self, cost):
+    self.cost += cost
+    self.calls += 1
+
+class FileInfo:
+  def __init__(self, file, line):
+    self.file = file
+    self.line = line
+
+class Function:
+  def __init__(self, dsoName, name, sym):
+    self.cost = Cost()
+    self.calls = 0
+    self.dso = dsoName
+    self.name = name
+    self.sym = sym
+    self.fileInfo = FileInfo("???", 0)
+
+    self.callees = defaultdict(lambda: Cost())
+
+class DSO:
+  def __init__(self):
+    self.functions = dict()
+    self.name = ""
+
+  def createFileInfo(self):
+    # try
+    addresses = ""
+    for sym, function in self.functions.items():
+      try:
+        addresses += hex(function.sym['start']) + "\n"
+      except:
+        addresses += "\n"
+    process = subprocess.Popen(["addr2line", "-e", self.name], stdin=PIPE, stdout=PIPE, stderr=DEVNULL, universal_newlines=True)
+    output = process.communicate(input=addresses)[0].split('\n')
+    pos = 0
+    for sym, function in self.functions.items():
+      try:
+        addressInfo = output[pos].split(':')
+        file = addressInfo[0]
+      except:
+        file = None
+      if not function.sym or not file or file == "??":
+        file = "???"
+      try:
+        line = int(addressInfo[1])
+      except:
+        line = 0
+      function.fileInfo = FileInfo(file, line)
+      pos = pos + 1
+
+# a map of all encountered dso's and the functions therein
+# this is done to prevent name clashes
+dsos = defaultdict(lambda: DSO())
+
+def addFunction(dsoName, name, sym):
+  global dsos
+  dso = dsos[dsoName]
+  if not dso.name:
+      dso.name = dsoName
+  function = dso.functions.get(name, None)
+  # create function if it's not yet known
+  if not function:
+    function = Function(dsoName, name, sym)
+    dso.functions[name] = function
+  return function
+
+eventsType = "events: Samples"
+
+# write the callgrind file format to stdout
+def trace_end():
+  global dsos
+
+  print("version: 1")
+  print("creator: perf-callgrind 0.1")
+  print("part: 1")
+  # TODO: get access to command line, it's in the perf data header
+  #       but not accessible to the scripting backend, is it?
+  print(eventsType)
+
+  for dsoName, dso in dsos.items():
+    dso.createFileInfo()
+
+  for dsoName, dso in dsos.items():
+    print("ob=%s" % dsoName)
+    for sym, function in dso.functions.items():
+      print("fl=%s" % function.fileInfo.file)
+      print("fn=%s" % sym)
+      print("%d %d" % (function.fileInfo.line, function.cost.cost))
+      for callee, cost in function.callees.items():
+        print("cob=%s" % callee.dso)
+        print("cfi=%s" % callee.fileInfo.file)
+        print("cfn=%s" % callee.name)
+        print("calls=%d %d" % (cost.calls, callee.fileInfo.line))
+        print("%d %d" % (function.fileInfo.line, cost.cost))
+      print("")
+
+def addSample(event, cost, callchain):
+  caller = None
+  if not callchain:
+    # only add the single symbol where we got the sample, without a backtrace
+    dsoName = event.get("dso", "???")
+    name = event.get("symbol", "???")
+    caller = addFunction(dsoName, name, None)
+  else:
+    # add a function for every frame in the callchain
+    for item in reversed(callchain):
+      dsoName = item.get("dso", "???")
+      name = "???"
+      if "sym" in item:
+        name = item["sym"]["name"]
+      function = addFunction(dsoName, name, item.get("sym", None))
+      # add current frame to parent's callee list
+      if caller is not None:
+        caller.callees[function].add(cost)
+      caller = function
+
+  # increase the self cost of the last frame
+  # all other frames include it now and kcachegrind will automatically
+  # take care of adapting their inclusive cost
+  if caller is not None:
+    caller.cost.add(cost)
+
+def process_event(event):
+  global eventsType
+  caller = addSample(event, 1, event["callchain"])
+
+def trace_unhandled(event_name, context, sample, event):
+  global eventsType
+  cost = 1
+  if sample["period"] > 0:
+    cost = sample["period"]
+    eventsType = "event: ns: time in ns\nevents: ns"
+  caller = addSample(event, cost, event['common_callchain'])
diff --git a/scripts/heatmap.py b/scripts/heatmap.py
new file mode 100755
index 0000000000000000000000000000000000000000..ac05d784930638e22ccd799cf43ed04cb01aa661
--- /dev/null
+++ b/scripts/heatmap.py
@@ -0,0 +1,135 @@
+#! /usr/bin/env python3
+
+import argparse
+import subprocess
+import itertools
+import os
+import re
+import pandas
+import matplotlib.pyplot as plt
+import numpy as np
+
+PARAMETERS = {
+    "REACTIVITY": [10],
+    "DurationTask": [10, 100, 1_000],
+    "N_Iter": [100, 1_000, 10_000],
+}
+CSV_COLS = ["REACTIVITY", "DurationTask", "N_Iter", "Overhead"]
+N_REPEAT = 100
+
+EXEC = "target/release/examples/overhead"
+REGEX = re.compile(r"Preemptive iterator overhead (-?\d+\.\d+)")
+
+CONFIDENCE_INTERVAL = 0.95
+
+
+def percent_up(series: pandas.Series) -> np.float64:
+    # 95 + 5/2 = 97.5 we left 2.5 to the right
+    return series.quantile(CONFIDENCE_INTERVAL + (1 - CONFIDENCE_INTERVAL) / 2)
+
+
+def percent_low(series: pandas.Series) -> np.float64:
+    # 5 - 5/2 = 2.5 we left 2.5 to the left
+    return series.quantile(1 - CONFIDENCE_INTERVAL - (1 - CONFIDENCE_INTERVAL) / 2)
+
+
+STATS = ["min", "max", "median", "mean", "std", percent_low, percent_up]
+GATHER_STATS = {"Overhead": STATS}
+
+
+def collect_heatmap(out_csv: str):
+    if os.path.exists(out_csv):
+        user_input = input("Remove old csv (y/n) ?")
+        if user_input.startswith("y"):
+            os.remove(out_csv)
+        else:
+            print("Do nothing")
+            return
+    with open(out_csv, "w") as f:
+        f.write(",".join(CSV_COLS) + "\n")
+        for reac, dur, niter in itertools.product(*PARAMETERS.values()):
+            for _ in range(N_REPEAT):
+                out = subprocess.run(
+                    [
+                        EXEC,
+                        "--duration-task",
+                        str(dur),
+                        "--reactivity-us",
+                        str(reac),
+                        "--n-iter",
+                        str(niter),
+                    ],
+                    check=True,
+                    text=True,
+                    stdout=subprocess.PIPE,
+                )
+                assert out.stdout
+                capture = re.search(REGEX, out.stdout)
+                assert capture
+                overhead = capture.group(1)
+                f.write(f"{reac},{dur},{niter},{overhead}\n")
+                print(f"{reac},{dur},{niter},{overhead}")
+
+
+def plot_heatmap(path: str):
+    df = pandas.read_csv(path)
+    print(df)
+    df = df.groupby(list(PARAMETERS)).agg(GATHER_STATS).reset_index()
+    print(df)
+    fig, ax = plt.subplots()
+    ax.set_xticks(
+        np.arange(len(PARAMETERS["DurationTask"])),
+        [str(d) for d in PARAMETERS["DurationTask"]],
+    )
+    ax.set_yticks(
+        np.arange(len(PARAMETERS["N_Iter"])),
+        [str(d) for d in PARAMETERS["N_Iter"]],
+    )
+
+    data = np.zeros((len(PARAMETERS["N_Iter"]), len(PARAMETERS["DurationTask"])))
+    for i, niter in enumerate(PARAMETERS["N_Iter"]):
+        for j, dur in enumerate(PARAMETERS["DurationTask"]):
+            condition = (df["N_Iter"] == niter) & (df["DurationTask"] == dur)
+            overhead = df[condition]["Overhead"]
+            print(overhead)
+            lb = np.float64(overhead["percent_low"].iloc[0])
+            ub = np.float64(overhead["percent_up"].iloc[0])
+            mid = (lb + ub) / 2
+            err = ub - mid
+            data[i, j] = mid
+            ax.text(
+                j,
+                i,
+                f"{data[i,j]:.2f}±{err:.2f}%",
+                ha="center",
+                va="center",
+                color="w",
+            )
+
+    ax.imshow(data)
+    fig.tight_layout()
+    ax.set_xlabel("Duration of each iteration in approx e-7 secs.")
+    ax.set_ylabel("Number of iterations.")
+    ax.set_title(
+        f"Overhead in % of the iteration using preemptive iter vs native iter. (reactivity = {PARAMETERS['REACTIVITY'][0]} μs)"
+    )
+    plt.show()
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter
+    )
+    parser.add_argument("--collect-csv", help="File to generate.")
+    parser.add_argument("--plot-csv", help="File to user for plot.")
+    args = parser.parse_args()
+
+    if args.plot_csv:
+        plot_heatmap(args.plot_csv)
+
+    if args.collect_csv:
+        collect_heatmap(args.collect_csv)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/plot_heatmap.py b/scripts/plot_heatmap.py
new file mode 100644
index 0000000000000000000000000000000000000000..135b5b6265e4a88d92c7c97bc7ae76c141f3e033
--- /dev/null
+++ b/scripts/plot_heatmap.py
@@ -0,0 +1,114 @@
+import argparse
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas
+from functools import reduce
+
+
+DURATION_KEY = "DurationTask"
+REACTIVITY_KEY = "Reactivity"
+ITER_KEY = "N_Iter"
+BASELINE_KEY = "BaselineTime"
+OUR_KEY = "IterTime"
+OVERHEAD_KEY = "Overhead"
+MEAN_KEY = "mean"
+
+CONFIDENCE_INTERVAL = 0.95
+
+
+def percent_up(series: pandas.Series) -> np.float64:
+    # 95 + 5/2 = 97.5 we left 2.5 to the right
+    return series.quantile(CONFIDENCE_INTERVAL + (1 - CONFIDENCE_INTERVAL) / 2)
+
+
+def percent_low(series: pandas.Series) -> np.float64:
+    # 5 - 5/2 = 2.5 we left 2.5 to the left
+    return series.quantile(1 - CONFIDENCE_INTERVAL - (1 - CONFIDENCE_INTERVAL) / 2)
+
+
+PARAMETERS = [REACTIVITY_KEY, ITER_KEY, DURATION_KEY]
+REACTIVITY_IDX = PARAMETERS.index(REACTIVITY_KEY)
+ITER_IDX = PARAMETERS.index(ITER_KEY)
+DURATION_IDX = PARAMETERS.index(DURATION_KEY)
+STATS = ["min", "max", "median", "mean", "std", percent_low, percent_up]
+GATHER_STATS = {OUR_KEY: STATS, BASELINE_KEY: STATS, OVERHEAD_KEY: STATS}
+
+
+def show_distribution(reac: int, iter: int, duration: int, df: pandas.DataFrame):
+    rows = df[
+        (df[REACTIVITY_KEY] == reac)
+        & (df[ITER_KEY] == iter)
+        & (df[DURATION_KEY] == duration)
+    ]
+    # baseline = rows[BASELINE_KEY].values
+    # our = rows[OUR_KEY].values
+    # plt.hist(baseline, bins=300, label="baseline", alpha=0.5)
+    # plt.hist(our, bins=300, label="our", alpha=0.5)
+    # plt.legend()
+    plt.hist(rows[OVERHEAD_KEY].values, bins=300)
+    plt.show()
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("heatmap_csv_file", help="path to csv file with heatmap data")
+    parser.add_argument(
+        "--show-distrib",
+        action="store_true",
+        help="Show distribution of the Worst case.",
+    )
+    args = parser.parse_args()
+    df = pandas.read_csv(args.heatmap_csv_file)
+    print(df)
+    # create column for overhead
+    df[OVERHEAD_KEY] = (df[OUR_KEY] - df[BASELINE_KEY]) / df[BASELINE_KEY] * 100.0
+
+    if args.show_distrib:
+        show_distribution(10, 1000, 10, df)
+
+    # gather stats on overhead and baseline with duplicates rows with same tuple (dur, iter, reac)
+    df = df.groupby(PARAMETERS).agg(GATHER_STATS).reset_index()
+
+    uniques = [df[key].unique() for key in PARAMETERS]
+    shape = [len(uniq) for uniq in uniques]
+    print(f"Generating a {shape[DURATION_IDX]} x {shape[ITER_IDX]} HeatMap")
+    fig, ax = plt.subplots()
+    ax.set_xticks(
+        np.arange(shape[DURATION_IDX]), [str(d) for d in uniques[DURATION_IDX]]
+    )
+    ax.set_yticks(np.arange(shape[ITER_IDX]), [str(i) for i in uniques[ITER_IDX]])
+
+    data = np.zeros(shape[1:])
+    for i in range(shape[ITER_IDX]):
+        for j in range(shape[DURATION_IDX]):
+            indices = (0, i, j)
+            pairs = (df[k] == u[idx] for k, u, idx in zip(PARAMETERS, uniques, indices))
+            condition = reduce(lambda a, b: a & b, pairs)
+            print(df[condition][OVERHEAD_KEY])
+            overhead = df[condition][OVERHEAD_KEY]
+            lb = np.float64(overhead["percent_low"].iloc[0])
+            ub = np.float64(overhead["percent_up"].iloc[0])
+            mid = (lb + ub) / 2
+            err = ub - mid
+            data[i, j] = mid
+            ax.text(
+                j,
+                i,
+                f"{data[i,j]:.2f}±{err:.2f}%",
+                ha="center",
+                va="center",
+                color="w",
+            )
+
+    ax.imshow(data)
+    fig.tight_layout()
+    ax.set_xlabel("Duration of each iteration in approx e-7 secs.")
+    ax.set_ylabel("Number of iterations.")
+    ax.set_title(
+        f"Overhead in % of the iteration using preemptive iter vs native iter. (reactivity = {uniques[REACTIVITY_IDX][0]} μs)"
+    )
+    plt.show()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/scripts/setup_bench.py b/scripts/setup_bench.py
new file mode 100755
index 0000000000000000000000000000000000000000..d64bd052c424866373631c4613ec1c5d215cb06c
--- /dev/null
+++ b/scripts/setup_bench.py
@@ -0,0 +1,22 @@
+#! /bin/python3
+
+import os
+
+
+def main():
+    nb_threads = os.sysconf("SC_NPROCESSORS_CONF")
+    print(f"Setting maximum frequency to all {nb_threads=} Cores.")
+    for i in range(nb_threads):
+        path = f"/sys/devices/system/cpu/cpu{i}/cpufreq/scaling_governor"
+        with open(path, "w") as scaling_gov:
+            scaling_gov.write("performance")
+    print("Successfully set max frequency to all CPUs")
+
+    print("Disabling turbo")
+    with open("/sys/devices/system/cpu/intel_pstate/no_turbo", "w") as no_turbo:
+        no_turbo.write("1")
+    print("Successfully disabled turbo boost")
+
+
+if __name__ == "__main__":
+    main()