diff --git a/.gitignore b/.gitignore index c7da63d8c5e11a21a72a0350acf471c53bc80ecc..8a485b3fed026eaa193539fe5a0e48ee530ebaeb 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,8 @@ /target pyrightconfig.json perf.data* -.python-version # pyenv local generated file + +# pyenv local generated file +.python-version + +*.csv diff --git a/examples/overhead.rs b/examples/overhead.rs index 17627e257a903eea1e049c5565659ab61de78638..06df51a228c34490f7e402c1a25185c12baee71d 100644 --- a/examples/overhead.rs +++ b/examples/overhead.rs @@ -1,28 +1,43 @@ -use std::time::Instant; +#![feature(core_intrinsics)] use clap::Parser; use embassy_time::Duration; use preemptive_iterator::PreemptiveIterator; +use std::time::Instant; async fn measure_iter(args: Args) -> std::time::Duration { - let dur = std::time::Duration::from_micros(args.duration_us); let reactivity = Duration::from_micros(args.reactivity_us); let start = Instant::now(); (0..args.n_iter) - .preemptive_for_each( - |_| { - busy_wait(dur); - }, - reactivity, - ) + .preemptive_for_each(|_| work(args.duration_task), reactivity) .await; start.elapsed() } +fn measure_busy_wait(args: Args) -> std::time::Duration { + let start = Instant::now(); + for _ in 0..args.n_iter { + std::hint::black_box(work(args.duration_task)); + } + start.elapsed() +} + +fn work(task_duration: u64) { + let mut res = 0.0; + let mut i = 0; + loop { + res += (i as f64) * (i as f64); + i += 1; + if std::intrinsics::unlikely(i == task_duration) { + break; + } + } + std::hint::black_box(res); +} #[derive(Parser, Debug, Clone, Copy)] struct Args { #[arg(long)] - duration_us: u64, + duration_task: u64, #[arg(long)] reactivity_us: u64, #[arg(long)] @@ -31,9 +46,10 @@ struct Args { fn main() { let args = Args::parse(); - let time_iter = embassy_futures::block_on(measure_iter(args)); let time_ref = measure_busy_wait(args); - let duration = std::time::Duration::from_micros(args.duration_us); + let _ = embassy_futures::block_on(measure_iter(args)); + let time_iter = embassy_futures::block_on(measure_iter(args)); + let duration = std::time::Duration::from_micros(args.duration_task); println!( "Baseline: {} busy_sleep of {:?} took {:?}", args.n_iter, duration, time_ref @@ -46,23 +62,3 @@ fn main() { let overhead = (time_iter.as_secs_f64() - baseline) / baseline; println!("Preemptive iterator overhead {:.4}%", overhead * 100.0); } - -fn measure_busy_wait(args: Args) -> std::time::Duration { - let duration = std::time::Duration::from_micros(args.duration_us); - let start = Instant::now(); - for _ in 0..args.n_iter { - std::hint::black_box(busy_wait(duration)); - } - start.elapsed() -} - -fn busy_wait(duration: std::time::Duration) { - let start = Instant::now(); - let mut counter = 0u64; - // Busy loop until the desired duration has passed - while start.elapsed() < duration { - counter += 1; - } - // Use the counter somehow to prevent it from being optimized out - std::hint::black_box(counter); -} diff --git a/scripts/callgrind.py b/scripts/callgrind.py new file mode 100644 index 0000000000000000000000000000000000000000..29ef3ac07becb7a15265a47f58c3af2f1a5450d6 --- /dev/null +++ b/scripts/callgrind.py @@ -0,0 +1,170 @@ +# perf script event handlers, generated by perf script -g python +# (c) 2016, Milian Wolff <milian.wolff@kdab.com> +# (c) 2019, Lubos Lunak <l.lunak@kde.org> +# Licensed under the terms of the GNU GPL License version 2 +# +# This script converts perf data into the callgrind format. +# The output can then be visualized in kcachegrind. +# +# Usage: perf script -s perf2calltree.py > perf.out +# +# NOTE: This script currently does not support conversion of data files +# that contain multiple event sources. + +import os +import sys +import subprocess +from collections import defaultdict +from subprocess import PIPE + +sys.path.append(os.environ['PERF_EXEC_PATH'] + \ + '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') + +from Core import * +from perf_trace_context import * + +try: + from subprocess import DEVNULL # py3k +except ImportError: + import os + DEVNULL = open(os.devnull, 'wb') + +class Cost: + def __init__(self): + self.cost = 0 + self.calls = 0 + + def add(self, cost): + self.cost += cost + self.calls += 1 + +class FileInfo: + def __init__(self, file, line): + self.file = file + self.line = line + +class Function: + def __init__(self, dsoName, name, sym): + self.cost = Cost() + self.calls = 0 + self.dso = dsoName + self.name = name + self.sym = sym + self.fileInfo = FileInfo("???", 0) + + self.callees = defaultdict(lambda: Cost()) + +class DSO: + def __init__(self): + self.functions = dict() + self.name = "" + + def createFileInfo(self): + # try + addresses = "" + for sym, function in self.functions.items(): + try: + addresses += hex(function.sym['start']) + "\n" + except: + addresses += "\n" + process = subprocess.Popen(["addr2line", "-e", self.name], stdin=PIPE, stdout=PIPE, stderr=DEVNULL, universal_newlines=True) + output = process.communicate(input=addresses)[0].split('\n') + pos = 0 + for sym, function in self.functions.items(): + try: + addressInfo = output[pos].split(':') + file = addressInfo[0] + except: + file = None + if not function.sym or not file or file == "??": + file = "???" + try: + line = int(addressInfo[1]) + except: + line = 0 + function.fileInfo = FileInfo(file, line) + pos = pos + 1 + +# a map of all encountered dso's and the functions therein +# this is done to prevent name clashes +dsos = defaultdict(lambda: DSO()) + +def addFunction(dsoName, name, sym): + global dsos + dso = dsos[dsoName] + if not dso.name: + dso.name = dsoName + function = dso.functions.get(name, None) + # create function if it's not yet known + if not function: + function = Function(dsoName, name, sym) + dso.functions[name] = function + return function + +eventsType = "events: Samples" + +# write the callgrind file format to stdout +def trace_end(): + global dsos + + print("version: 1") + print("creator: perf-callgrind 0.1") + print("part: 1") + # TODO: get access to command line, it's in the perf data header + # but not accessible to the scripting backend, is it? + print(eventsType) + + for dsoName, dso in dsos.items(): + dso.createFileInfo() + + for dsoName, dso in dsos.items(): + print("ob=%s" % dsoName) + for sym, function in dso.functions.items(): + print("fl=%s" % function.fileInfo.file) + print("fn=%s" % sym) + print("%d %d" % (function.fileInfo.line, function.cost.cost)) + for callee, cost in function.callees.items(): + print("cob=%s" % callee.dso) + print("cfi=%s" % callee.fileInfo.file) + print("cfn=%s" % callee.name) + print("calls=%d %d" % (cost.calls, callee.fileInfo.line)) + print("%d %d" % (function.fileInfo.line, cost.cost)) + print("") + +def addSample(event, cost, callchain): + caller = None + if not callchain: + # only add the single symbol where we got the sample, without a backtrace + dsoName = event.get("dso", "???") + name = event.get("symbol", "???") + caller = addFunction(dsoName, name, None) + else: + # add a function for every frame in the callchain + for item in reversed(callchain): + dsoName = item.get("dso", "???") + name = "???" + if "sym" in item: + name = item["sym"]["name"] + function = addFunction(dsoName, name, item.get("sym", None)) + # add current frame to parent's callee list + if caller is not None: + caller.callees[function].add(cost) + caller = function + + # increase the self cost of the last frame + # all other frames include it now and kcachegrind will automatically + # take care of adapting their inclusive cost + if caller is not None: + caller.cost.add(cost) + +def process_event(event): + global eventsType + caller = addSample(event, 1, event["callchain"]) + +def trace_unhandled(event_name, context, sample, event): + global eventsType + cost = 1 + if sample["period"] > 0: + cost = sample["period"] + eventsType = "event: ns: time in ns\nevents: ns" + caller = addSample(event, cost, event['common_callchain']) diff --git a/scripts/heatmap.py b/scripts/heatmap.py new file mode 100755 index 0000000000000000000000000000000000000000..ac05d784930638e22ccd799cf43ed04cb01aa661 --- /dev/null +++ b/scripts/heatmap.py @@ -0,0 +1,135 @@ +#! /usr/bin/env python3 + +import argparse +import subprocess +import itertools +import os +import re +import pandas +import matplotlib.pyplot as plt +import numpy as np + +PARAMETERS = { + "REACTIVITY": [10], + "DurationTask": [10, 100, 1_000], + "N_Iter": [100, 1_000, 10_000], +} +CSV_COLS = ["REACTIVITY", "DurationTask", "N_Iter", "Overhead"] +N_REPEAT = 100 + +EXEC = "target/release/examples/overhead" +REGEX = re.compile(r"Preemptive iterator overhead (-?\d+\.\d+)") + +CONFIDENCE_INTERVAL = 0.95 + + +def percent_up(series: pandas.Series) -> np.float64: + # 95 + 5/2 = 97.5 we left 2.5 to the right + return series.quantile(CONFIDENCE_INTERVAL + (1 - CONFIDENCE_INTERVAL) / 2) + + +def percent_low(series: pandas.Series) -> np.float64: + # 5 - 5/2 = 2.5 we left 2.5 to the left + return series.quantile(1 - CONFIDENCE_INTERVAL - (1 - CONFIDENCE_INTERVAL) / 2) + + +STATS = ["min", "max", "median", "mean", "std", percent_low, percent_up] +GATHER_STATS = {"Overhead": STATS} + + +def collect_heatmap(out_csv: str): + if os.path.exists(out_csv): + user_input = input("Remove old csv (y/n) ?") + if user_input.startswith("y"): + os.remove(out_csv) + else: + print("Do nothing") + return + with open(out_csv, "w") as f: + f.write(",".join(CSV_COLS) + "\n") + for reac, dur, niter in itertools.product(*PARAMETERS.values()): + for _ in range(N_REPEAT): + out = subprocess.run( + [ + EXEC, + "--duration-task", + str(dur), + "--reactivity-us", + str(reac), + "--n-iter", + str(niter), + ], + check=True, + text=True, + stdout=subprocess.PIPE, + ) + assert out.stdout + capture = re.search(REGEX, out.stdout) + assert capture + overhead = capture.group(1) + f.write(f"{reac},{dur},{niter},{overhead}\n") + print(f"{reac},{dur},{niter},{overhead}") + + +def plot_heatmap(path: str): + df = pandas.read_csv(path) + print(df) + df = df.groupby(list(PARAMETERS)).agg(GATHER_STATS).reset_index() + print(df) + fig, ax = plt.subplots() + ax.set_xticks( + np.arange(len(PARAMETERS["DurationTask"])), + [str(d) for d in PARAMETERS["DurationTask"]], + ) + ax.set_yticks( + np.arange(len(PARAMETERS["N_Iter"])), + [str(d) for d in PARAMETERS["N_Iter"]], + ) + + data = np.zeros((len(PARAMETERS["N_Iter"]), len(PARAMETERS["DurationTask"]))) + for i, niter in enumerate(PARAMETERS["N_Iter"]): + for j, dur in enumerate(PARAMETERS["DurationTask"]): + condition = (df["N_Iter"] == niter) & (df["DurationTask"] == dur) + overhead = df[condition]["Overhead"] + print(overhead) + lb = np.float64(overhead["percent_low"].iloc[0]) + ub = np.float64(overhead["percent_up"].iloc[0]) + mid = (lb + ub) / 2 + err = ub - mid + data[i, j] = mid + ax.text( + j, + i, + f"{data[i,j]:.2f}±{err:.2f}%", + ha="center", + va="center", + color="w", + ) + + ax.imshow(data) + fig.tight_layout() + ax.set_xlabel("Duration of each iteration in approx e-7 secs.") + ax.set_ylabel("Number of iterations.") + ax.set_title( + f"Overhead in % of the iteration using preemptive iter vs native iter. (reactivity = {PARAMETERS['REACTIVITY'][0]} μs)" + ) + plt.show() + + +def main(): + parser = argparse.ArgumentParser( + formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + parser.add_argument("--collect-csv", help="File to generate.") + parser.add_argument("--plot-csv", help="File to user for plot.") + args = parser.parse_args() + + if args.plot_csv: + plot_heatmap(args.plot_csv) + + if args.collect_csv: + collect_heatmap(args.collect_csv) + + +if __name__ == "__main__": + main() diff --git a/scripts/plot_heatmap.py b/scripts/plot_heatmap.py new file mode 100644 index 0000000000000000000000000000000000000000..135b5b6265e4a88d92c7c97bc7ae76c141f3e033 --- /dev/null +++ b/scripts/plot_heatmap.py @@ -0,0 +1,114 @@ +import argparse +import matplotlib.pyplot as plt +import numpy as np +import pandas +from functools import reduce + + +DURATION_KEY = "DurationTask" +REACTIVITY_KEY = "Reactivity" +ITER_KEY = "N_Iter" +BASELINE_KEY = "BaselineTime" +OUR_KEY = "IterTime" +OVERHEAD_KEY = "Overhead" +MEAN_KEY = "mean" + +CONFIDENCE_INTERVAL = 0.95 + + +def percent_up(series: pandas.Series) -> np.float64: + # 95 + 5/2 = 97.5 we left 2.5 to the right + return series.quantile(CONFIDENCE_INTERVAL + (1 - CONFIDENCE_INTERVAL) / 2) + + +def percent_low(series: pandas.Series) -> np.float64: + # 5 - 5/2 = 2.5 we left 2.5 to the left + return series.quantile(1 - CONFIDENCE_INTERVAL - (1 - CONFIDENCE_INTERVAL) / 2) + + +PARAMETERS = [REACTIVITY_KEY, ITER_KEY, DURATION_KEY] +REACTIVITY_IDX = PARAMETERS.index(REACTIVITY_KEY) +ITER_IDX = PARAMETERS.index(ITER_KEY) +DURATION_IDX = PARAMETERS.index(DURATION_KEY) +STATS = ["min", "max", "median", "mean", "std", percent_low, percent_up] +GATHER_STATS = {OUR_KEY: STATS, BASELINE_KEY: STATS, OVERHEAD_KEY: STATS} + + +def show_distribution(reac: int, iter: int, duration: int, df: pandas.DataFrame): + rows = df[ + (df[REACTIVITY_KEY] == reac) + & (df[ITER_KEY] == iter) + & (df[DURATION_KEY] == duration) + ] + # baseline = rows[BASELINE_KEY].values + # our = rows[OUR_KEY].values + # plt.hist(baseline, bins=300, label="baseline", alpha=0.5) + # plt.hist(our, bins=300, label="our", alpha=0.5) + # plt.legend() + plt.hist(rows[OVERHEAD_KEY].values, bins=300) + plt.show() + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("heatmap_csv_file", help="path to csv file with heatmap data") + parser.add_argument( + "--show-distrib", + action="store_true", + help="Show distribution of the Worst case.", + ) + args = parser.parse_args() + df = pandas.read_csv(args.heatmap_csv_file) + print(df) + # create column for overhead + df[OVERHEAD_KEY] = (df[OUR_KEY] - df[BASELINE_KEY]) / df[BASELINE_KEY] * 100.0 + + if args.show_distrib: + show_distribution(10, 1000, 10, df) + + # gather stats on overhead and baseline with duplicates rows with same tuple (dur, iter, reac) + df = df.groupby(PARAMETERS).agg(GATHER_STATS).reset_index() + + uniques = [df[key].unique() for key in PARAMETERS] + shape = [len(uniq) for uniq in uniques] + print(f"Generating a {shape[DURATION_IDX]} x {shape[ITER_IDX]} HeatMap") + fig, ax = plt.subplots() + ax.set_xticks( + np.arange(shape[DURATION_IDX]), [str(d) for d in uniques[DURATION_IDX]] + ) + ax.set_yticks(np.arange(shape[ITER_IDX]), [str(i) for i in uniques[ITER_IDX]]) + + data = np.zeros(shape[1:]) + for i in range(shape[ITER_IDX]): + for j in range(shape[DURATION_IDX]): + indices = (0, i, j) + pairs = (df[k] == u[idx] for k, u, idx in zip(PARAMETERS, uniques, indices)) + condition = reduce(lambda a, b: a & b, pairs) + print(df[condition][OVERHEAD_KEY]) + overhead = df[condition][OVERHEAD_KEY] + lb = np.float64(overhead["percent_low"].iloc[0]) + ub = np.float64(overhead["percent_up"].iloc[0]) + mid = (lb + ub) / 2 + err = ub - mid + data[i, j] = mid + ax.text( + j, + i, + f"{data[i,j]:.2f}±{err:.2f}%", + ha="center", + va="center", + color="w", + ) + + ax.imshow(data) + fig.tight_layout() + ax.set_xlabel("Duration of each iteration in approx e-7 secs.") + ax.set_ylabel("Number of iterations.") + ax.set_title( + f"Overhead in % of the iteration using preemptive iter vs native iter. (reactivity = {uniques[REACTIVITY_IDX][0]} μs)" + ) + plt.show() + + +if __name__ == "__main__": + main() diff --git a/scripts/setup_bench.py b/scripts/setup_bench.py new file mode 100755 index 0000000000000000000000000000000000000000..d64bd052c424866373631c4613ec1c5d215cb06c --- /dev/null +++ b/scripts/setup_bench.py @@ -0,0 +1,22 @@ +#! /bin/python3 + +import os + + +def main(): + nb_threads = os.sysconf("SC_NPROCESSORS_CONF") + print(f"Setting maximum frequency to all {nb_threads=} Cores.") + for i in range(nb_threads): + path = f"/sys/devices/system/cpu/cpu{i}/cpufreq/scaling_governor" + with open(path, "w") as scaling_gov: + scaling_gov.write("performance") + print("Successfully set max frequency to all CPUs") + + print("Disabling turbo") + with open("/sys/devices/system/cpu/intel_pstate/no_turbo", "w") as no_turbo: + no_turbo.write("1") + print("Successfully disabled turbo boost") + + +if __name__ == "__main__": + main()