diff --git a/.gitignore b/.gitignore index bc88d7251ca2fe421373c81e2cadb4b157fe225c..26a4d913357353c8e45a2af80f0c9929aedde10e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,4 @@ -**/__binaries__ /openmp **/__pycache__ *.py[co] +__binaries__ diff --git a/model/numa/Makefile b/model/numa/Makefile index f419a735c3c626648f84bd461dba02ea0fc6fdbb..36e9fd5f59f7cd91817aba450159b82c65c6261b 100644 --- a/model/numa/Makefile +++ b/model/numa/Makefile @@ -4,7 +4,7 @@ GCC ?= gcc BIN_NAME := move_page -CFLAGS := -g -O0 +CFLAGS := -g -O0 -Wall -pedantic -Wno-pointer-arith LDFLAGS := -lnuma # force bin dir creation before executing rules diff --git a/model/numa/__init__.py b/model/numa/__init__.py index a647d57081ae80438bf48177962cec0499b26c4f..4cdcaa8fd567166d3bd66fe6c85c7c4b0d172927 100644 --- a/model/numa/__init__.py +++ b/model/numa/__init__.py @@ -48,25 +48,107 @@ class cmd_numa_pagemap(gdb.Command): pagemap_bin = pagemap_path def invoke (self, args, from_tty): + print_raw = False + if "-raw" in args: + print_raw = True + args = args.replace("-raw", "") + try: addr = gdb.parse_and_eval(args) except gdb.error as e: log_user.error("Cannot evaluate '{}': {}".format(args, e)) return - cmd = "{} -n {} {}".format(pagemap_bin, gdb.selected_inferior().pid, addr) + cmd = "{} -n {} {}".format(pagemap_bin, gdb.selected_inferior().pid, hex(my_gdb.addr2num(addr))) res = subprocess.check_output(cmd, shell=True) try: pm_addr, _, node = res[:-1].split(" ") - + if print_raw: + print(node[1:]) + else: log_user.info("Address 0x{} is located on node {}".format(pm_addr, node)) - except ValueError: # couldnt split correctly + except ValueError: # couldn't split correctly + if not res[:-1]: + if print_raw: + print(-2) + else: + log_user.warn("Pagemap chose not to answer ... (page not mapped yet?)") + else: + if print_raw: + print(-1) + else: log_user.warn("Unexpected response from pagemap: '{}'".format(res[:-1])) +class cmd_numa_movepage(gdb.Command): + def __init__ (self): + gdb.Command.__init__ (self, "numa move_page", gdb.COMMAND_NONE) + + def invoke (self, args, from_tty): + addr, length, dest_node = args.split(" ") + + length = int(length) + dest_node = int(dest_node) + + try: + addr = my_gdb.addr2num(gdb.parse_and_eval(addr)) + except gdb.error as e: + log_user.error("Cannot evaluate '{}': {}".format(addr, e)) + return + + cmd = " ".join(map(str, [MOVE_PAGE, gdb.selected_inferior().pid, dest_node, hex(addr), length])) + + try: + log_user.warn("move {} ({}b) to node {}".format(hex(addr), length, dest_node)) + res = subprocess.check_output(cmd, shell=True, stderr=subprocess.STDOUT) + log_user.info(str(res)[:-1]) + except subprocess.CalledProcessError as e: + log_user.error("{} returned error code {}.".format(cmd, e.returncode)) + error = str(e.output)[:-1] + log_user.warn(error) + +class cmd_numa_current_core(gdb.Command): + def __init__ (self): + gdb.Command.__init__ (self, "numa current_core", gdb.COMMAND_NONE) + + def invoke (self, args, from_tty): + gdb.execute("numa current_node {} -core".format(args)) + +class cmd_numa_node_cores(gdb.Command): + def __init__ (self): + gdb.Command.__init__ (self, "numa node_cores", gdb.COMMAND_NONE) + + def invoke (self, args, from_tty): + try: + target_node = int(args) + except Exception as e: + log_user.warn("Expected a node id as parameter ({})".format(e)) + return + + log_user.info("Core on node {}:".format(target_node)) + for cpu, node in cmd_numa_current_node.singleton.cpu_to_node.items(): + if node == target_node: + log_user.info("{}".format(cpu)) + +class cmd_numa_core_node(gdb.Command): + def __init__ (self): + gdb.Command.__init__ (self, "numa core_node", gdb.COMMAND_NONE) + + def invoke (self, args, from_tty): + try: + target_core = int(args) + except Exception as e: + log_user.warn("Expected a core id as parameter ({})".format(e)) + return + node = cmd_numa_current_node.singleton.cpu_to_node.get(target_core, "<not found>") + log_user.info("Core {} is on node {}.".format(target_core, node)) + class cmd_numa_current_node(gdb.Command): + singleton = None + def __init__ (self): gdb.Command.__init__ (self, "numa current_node", gdb.COMMAND_NONE) self.cpu_to_node = {} + cmd_numa_current_node.singleton = self with open("/proc/cpuinfo") as cpuinfo_f: last_cpu = None @@ -109,6 +191,12 @@ class cmd_numa_current_node(gdb.Command): else: log_user.warn("Thread #{} is bound to CPUs {}.".format(int(gdb.selected_thread().num), allowed_cpus)) else: + if "-core" in args: + if "-raw" in args: + print(allowed_cpus) + else: + log_user.warn("Thread #{} is bound to node cpu {}.".format(int(gdb.selected_thread().num), allowed_cpus)) + else: bound_to_node = self.cpu_to_node[int(allowed_cpus)] if "-raw" in args: print(bound_to_node) @@ -151,10 +239,10 @@ class cmd_numa(gdb.Command): if args: gdb.execute("numa {}".format(args)) -def size_fmt(num, suffix='B'): +def size_fmt(num, suffix='B'): # from SO, could be improved ... for unit in ['','Ki','Mi','Gi','Ti','Pi','Ei','Zi']: if abs(num) < 1024.0: - return "%3.0f%s%s" % (num, unit, suffix) + return "%.0f%s %s" % (num, unit, suffix) num /= 1024.0 return "%.0f%s%s" % (num, 'Yi', suffix) @@ -166,14 +254,19 @@ class cmd_numa_spread_pages(gdb.Command): if gdb.selected_inferior().pid == 0: log_user.error("No PID for this inferior. Is it running?") return + + node_inc = int(args) if args else 1 with open("/proc/{}/maps".format(gdb.selected_inferior().pid)) as fmap: # search for "00601000-00622000 rw-p 00000000 00:00 0 [heap]" for line in fmap.readlines(): if "heap" in line: break + start, stop = line.split(" ")[0].split("-") start, stop = int(start, 16), int(stop, 16) - size = size_fmt(stop - start) + length = stop - start + size = size_fmt(length) + log_user.info("Process heap goes from {} to {} (={})".format(hex(start), hex(stop), size)) class cmd_numa_pin_thread(gdb.Command): @@ -183,13 +276,240 @@ class cmd_numa_pin_thread(gdb.Command): def invoke (self, args, from_tty): log_user.error("Thread pinning not implemented yet ...") pass - + def on_activated(): cmd_numa_current_node() + cmd_numa_current_core() cmd_numa_pagemap() + cmd_numa_movepage() cmd_numa_current_node_by_call() + cmd_numa_node_cores() + cmd_numa_core_node() + + cmd_numa_move_3d_matrix() + cmd_numa_move_3d_matrix_spread() cmd_numa_spread_pages() cmd_numa_pin_thread() def initialize(): cmd_numa() + + +MOVE_PAGE = "/home/pouget/dema/git/mcgdb/model/numa/__binaries__/move_page" +PAGESIZE = 4096 +class cmd_numa_move_3d_matrix(gdb.Command): + def __init__ (self): + gdb.Command.__init__ (self, "numa move_3D_matrix", gdb.COMMAND_NONE) + + def invoke (self, args, from_tty): + try: + mat_name, x, y, z, elt_size, dest_node = args.split(" ") + mat = gdb.parse_and_eval(mat_name) + x = int(gdb.parse_and_eval(x)) + y = int(gdb.parse_and_eval(y)) + z = int(gdb.parse_and_eval(z)) + elt_size = int(gdb.parse_and_eval(elt_size)) + dest_node = int(dest_node) + except Exception as e: + log_user.error("Expected parameters: matrice_name x_size y_size z_size elt_size dest_node (mat[x][y][z]): {}".format(e)) + return + + z_length = z*elt_size + + current_start = None + current_end = None + errors = [] + + def do_move_page(): + assert current_start is not None and current_end is not None + + length = current_end - current_start + cmd = " ".join(map(str, [MOVE_PAGE, gdb.selected_inferior().pid, dest_node, hex(int(current_start)), length])) + nb_pages = int(length / PAGESIZE) + 1 + try: + log_user.warn("{} # {} pages".format(cmd, nb_pages)) + res = subprocess.check_output(cmd, shell=True, stderr=subprocess.STDOUT) + log_user.info(str(res)[:-1]) + except subprocess.CalledProcessError as e: + log.error("{} returned error code {}.".format(cmd, e.returncode)) + errors.append(str(e.output)[:-1]) + log.warn(errors[-1]) + + for row_x in range(x): + log_user.info("Process {}[{}]".format(mat_name, row_x)) + for col_y in range(y): + + start_addr = my_gdb.addr2num(mat[row_x][col_y].address) + + page_start = start_addr & ~(PAGESIZE-1) + page_end = ((page_start + z_length) & ~(PAGESIZE-1)) + PAGESIZE-1 + + #log_user.warn("{}[{}][{}] is at {} @ {}".format(mat_name, row_x, col_y, start_addr, z_length)) + + if current_start is None: + current_start = page_start + current_end = page_end + + if page_start > current_end: + + log_user.warn("Move until {}[{}][{}]".format(mat_name, row_x, col_y)) + + # need to flush + do_move_page() + + current_start = page_start + current_end = page_end + else: + assert page_end >= current_end + current_end = page_end + + log_user.warn("Move until the end {}[{}][{}]".format(mat_name, row_x, col_y)) + do_move_page() + + if errors: + log_user.warn("{} errors happend during the page moves:".format(len(errors))) + for err in errors: + log_user.info(err) + log_user.info("---") +#numa move_3D_matrix r n1-1 n2-1 n3-1 sizeof(double) 15 + + +MOVE_PAGE = "/home/pouget/dema/git/mcgdb/model/numa/__binaries__/move_page" +PAGESIZE = 4096 + +PAGES_MOVED = 0 +class cmd_numa_move_3d_matrix_spread(gdb.Command): + def __init__ (self): + gdb.Command.__init__ (self, "numa move_3D_matrix_spread", gdb.COMMAND_NONE) + + def invoke (self, args, from_tty): + try: + mat_name, x, y, z, elt_size = args.split(" ") + mat = gdb.parse_and_eval(mat_name) + x = int(gdb.parse_and_eval(x)) + y = int(gdb.parse_and_eval(y)) + z = int(gdb.parse_and_eval(z)) + elt_size = int(gdb.parse_and_eval(elt_size)) + except Exception as e: + log_user.error("Expected parameters: matrice_name x_size y_size z_size elt_size # (mat[x][y][z]): {}".format(e)) + import pdb;pdb.set_trace() + + return + + z_length = z*elt_size + + current_start = None + current_end = None + current_start_idx = [0, 0] + + errors = [] + + def do_move_page(dest_node): + assert current_start is not None and current_end is not None + + length = current_end - current_start + cmd = " ".join(map(str, [MOVE_PAGE, gdb.selected_inferior().pid, dest_node, hex(int(current_start)), length])) + nb_pages = int(length / PAGESIZE) + 1 + global PAGES_MOVED + PAGES_MOVED += nb_pages + try: + res = subprocess.check_output(cmd, shell=True, stderr=subprocess.STDOUT) + log_user.warn(str(res)[:-1]) + except subprocess.CalledProcessError as e: + log.error("{} returned error code {}.".format(cmd, e.returncode)) + errors.append(str(e.output)[:-1]) + log.warn(errors[-1]) + + cpu_to_node = cmd_numa_current_node.singleton.cpu_to_node + NB_NODE = max(cpu_to_node.values()) + 1 + CORE_PER_NODE = len([i for i in cpu_to_node.values() if i == 0]) + NB_CORE = NB_NODE * CORE_PER_NODE + + import math # x = 514 + chunk_size = math.ceil(float(x) / NB_CORE) # 3 + SECOND_HALF = x % NB_CORE # 130 + current_core = 0 + current_core_load = 0 + + prev_node = None + for row_x in range(x): + dest_node = cpu_to_node[current_core] + + if prev_node is None: + prev_node = dest_node + + current_core_load += 1 + if current_core_load >= chunk_size: + current_core += 1 + current_core %= NB_CORE + current_core_load = 0 + + if current_core == SECOND_HALF: + chunk_size = math.ceil((x - row_x) / (NB_CORE - current_core)) + + if dest_node != prev_node and prev_node is not None: + log_user.info("") + log_user.info("Node break") + log_user.info("Move {}[{}][{}] -- {}[{}][{}] to {}".format(mat_name, current_start_idx[0], current_start_idx[1], + mat_name, row_x-1, y-1, dest_node)) + log_user.info("") + + do_move_page(prev_node) + prev_node = dest_node + + current_start = None + + #log_user.info("Process {}[{}] to {}".format(mat_name, row_x, dest_node)) + + for col_y in range(y): + start_addr = my_gdb.addr2num(mat[row_x][col_y][0].address) + + page_start = start_addr & ~(PAGESIZE-1) + page_end = ((page_start + z_length) & ~(PAGESIZE-1)) + PAGESIZE-1 + + if current_start is None: + current_start_idx = row_x, col_y + + current_start = page_start + current_end = page_end + + if False and page_start > current_end+1: + target_x = row_x + target_y = col_y + if target_y == 0: + target_x -=1 + target_y = y - 1 + else: + target_y -= 1 + + log_user.info("Move {}[{}][{}] -- {}[{}][{}] to {} (page break)".format(mat_name, current_start_idx[0], current_start_idx[1], + mat_name, target_x, target_y, dest_node)) + + # need to flush + do_move_page(dest_node) + + current_start_idx = row_x, col_y + current_start = page_start + current_end = page_end + else: + assert page_end >= current_end + current_end = page_end + + log_user.info("Move {}[{}][{}] -- {}[{}][{}] to {} (end-of-matrix)".format(mat_name, current_start_idx[0], current_start_idx[1], + mat_name, row_x, col_y, dest_node)) + do_move_page(dest_node) + + if errors: + log_user.warn("{} errors happend during the page moves:".format(len(errors))) + for err in errors: + log_user.info(err) + log_user.info("---") + import pdb;pdb.set_trace() + pass + +class cmd_numa_pages_moved(gdb.Command): + def __init__ (self): + gdb.Command.__init__ (self, "numa pages_moved", gdb.COMMAND_NONE) + + def invoke (self, args, from_tty): + log_user.info("{} page moved, or {}".format(PAGES_MOVED, size_fmt(PAGES_MOVED*PAGESIZE))) diff --git a/model/numa/move_page.c b/model/numa/move_page.c index 1d24a3366259f82211fe7c97d9cbe731d038e49b..6e24926b048cefc3a3b423c82eb2a8ea153a6232 100644 --- a/model/numa/move_page.c +++ b/model/numa/move_page.c @@ -2,6 +2,7 @@ #include <stdio.h> #include <unistd.h> #include <numaif.h> +#include <string.h> void usage(void) { @@ -11,42 +12,114 @@ void usage(void) int main(int argc, char *argv[]) { - if (argc < 4 || argv > 5) { + if (argc < 4 || argc > 5) { usage(); } + int pid = atoi(argv[1]); - int to_node = atoi(argv[2]); + int to_node_start, to_node_end, node_inc; + char *to_node_arg = argv[2]; + + char *is_node_range = strchr(to_node_arg, '-'); + if (is_node_range == NULL) { + // dest node is a node id + to_node_start = atoi(to_node_arg); + to_node_end = to_node_start; + node_inc = 0; + } else { + // dest node is a range of node ids + is_node_range[0] = '\0'; // end first half here + is_node_range++; // second half starts after the - + + to_node_start = atoi(to_node_arg); + + char *has_node_inc = strchr(is_node_range, '@'); + if (has_node_inc != NULL) { + has_node_inc[0] = '\0'; + has_node_inc++; + + node_inc = atoi(has_node_inc); + } else { + node_inc = 1; + } + + to_node_end = atoi(is_node_range); + } + void *addr = (void *) strtol(argv[3], NULL, 0); - int len = argc > 4 ? atoi(argv[4]) : 1; // default len: only one page + unsigned long len = argc > 4 ? strtol(argv[4], NULL, 0) : 1; // default len: only one page void *addr_end = addr+len; - - long int PAGESIZE = (long int) getpagesize(); + int ret; + + long int PAGESIZE = sysconf(_SC_PAGESIZE) /*(long int) getpagesize()*/; void *page = (void *)((long int)addr & ~(PAGESIZE-1)); unsigned long nb_pages = (addr_end - page) / PAGESIZE + 1; - printf("PID %d: move %p-%p to node %d (%d pages)\n", pid, addr, addr_end, to_node, nb_pages); - + printf("PID %d: move %p .. %p to nodes [%d:%d:%d] (%lu pages)\n", pid, addr, addr_end, to_node_start, to_node_end, node_inc, nb_pages); + void **pages = malloc(sizeof(*pages) * nb_pages); int *nodes = malloc(sizeof(*pages) * nb_pages); int *status = malloc(sizeof(*status) * nb_pages); - int i = 0; + unsigned long i = 0; + int current_node = to_node_start; + while (page < addr_end) { if (i >= nb_pages) { fprintf(stderr, "error in page splitting ...\n"); exit(2); } - - printf("move page %p\n",page); - + pages[i] = page; - nodes[i] = to_node; + nodes[i] = current_node; + + if (to_node_start != to_node_end) { + current_node += node_inc; + if (current_node > to_node_end) { + current_node = to_node_start; + } + } page += PAGESIZE; i += 1; - - + } + + ret = move_pages(pid, nb_pages, pages, nodes, status, MPOL_MF_MOVE); + + if (ret) { + perror("move_pages error"); + } + + int *on_nodes = malloc(sizeof(int) * nb_pages); + + ret = move_pages(pid, nb_pages, pages, NULL, on_nodes, 0); + if (ret) { + perror("move_pages check error"); } - return move_pages(pid, nb_pages, pages, nodes, status, MPOL_MF_MOVE); + unsigned long err = 0; + unsigned long unmapped = 0; +#define UNMAPPED_PAGE -2 + for (size_t i = 0; i < nb_pages - 1; i++) { + if (on_nodes[i] != nodes[i] && on_nodes[i] != UNMAPPED_PAGE) { + fprintf(stderr, "Page %zu (%p) should be on node %d, but is on node %d\n", i, pages[i], nodes[i], on_nodes[i]); + if (!ret) { + ret = -1; + } + err++; + } else if (on_nodes[i] == UNMAPPED_PAGE) { + unmapped++; + } + } + + if (err > 0) { + fprintf(stderr, "move_page failed to move %lu over %lu pages...\n", err, nb_pages); + } + + if (unmapped > 0) { + fprintf(stderr, "move_page failed to move %lu over %lu pages because they were unmapped...\n", unmapped, nb_pages); + } + + return -ret; } + diff --git a/model/profiling/info/__init__.py b/model/profiling/info/__init__.py index c0b0bfe6d40bbb28ae68ab9f9f53c074b6080002..041463bc3c992cc54e492878901f232772e8a315 100644 --- a/model/profiling/info/__init__.py +++ b/model/profiling/info/__init__.py @@ -6,7 +6,7 @@ log_user = logging.getLogger("mcgdb.log.user") from mcgdb.toolbox import my_gdb # will be added to global scope -info_counters = "perf_standalone", "omp", "numa" #"proc", "hit", "counter", "perf", "gprof", "memleaks" +info_counters = "perf_standalone", "omp", "numa", "proc" #"counter_id" #"proc", "hit", "counter", "perf", "gprof", "memleaks" info_counter_classes = None # will be populated in initialize() disabled_counters = set() @@ -38,7 +38,12 @@ def new_infoset(): counters = [] for info_counter_class in info_counter_classes: try: - counters.append(info_counter_class()) + if info_counter_class.__name__ == "numa_location_info": + pass + #counters.append(info_counter_class("&r[$omp_loop_start()][0][0]")) + #counters.append(info_counter_class("&u[$omp_loop_start()][0][0]")) + else: + counters.append(info_counter_class()) except RuntimeError as e: log.info("Couldn't instantiate counter class '{}'".format(info_counter_class.__name__)) log.info(e) diff --git a/model/profiling/info/counter_id.py b/model/profiling/info/counter_id.py new file mode 100644 index 0000000000000000000000000000000000000000..2666687e0730cc96b8a642e40c09cb721eac7570 --- /dev/null +++ b/model/profiling/info/counter_id.py @@ -0,0 +1,32 @@ +from collections import OrderedDict, defaultdict + +import logging; log = logging.getLogger(__name__) +log_user = logging.getLogger("mcgdb.log.user") + +import gdb + +cpt = 1 + +class counter_id_info(): + "counter id" + name = "counter id" + + def __init__(self): + self.__results = {"cid": None} + + def start(self): + global cpt + self.__results["cid"] = cpt + cpt += 1 + + def stop(self, paused=False): + pass + + def to_log(self, ongoing=False): + return self.__results + + @property + def results(self): + return self.__results + +__COUNTERS__ = [counter_id_info] diff --git a/model/profiling/info/numa.py b/model/profiling/info/numa.py index 4dd432f044bf4b8c98b92463704fee1b062539ad..9052669df3e4ff7e4138e5983a367a0f1a5c8f31 100644 --- a/model/profiling/info/numa.py +++ b/model/profiling/info/numa.py @@ -14,12 +14,25 @@ class numa_node_info(): gdb.execute("numa", to_string=True) # init, just in case def start(self): + good = True try: node = gdb.execute("numa current_node -raw", to_string=True) self.__results["numa node"] = int(node) + except Exception as e: self.__results["numa node"] = str(e) + good = False + try: + core = gdb.execute("numa current_core -raw", to_string=True) + self.__results["numa core"] = int(core) + except Exception as e: + self.__results["numa core"] = str(e) + good = False + + if good: + self.__results["numa node core"] = int(node) % 8 + def stop(self, paused=False): pass @@ -29,5 +42,25 @@ class numa_node_info(): @property def results(self): return self.__results + +class numa_location_info(): + def __init__(self, var): + self.var = var + self.name = "{} location".format(var) + self.__results = {self.name: None} + + gdb.execute("numa", to_string=True) # init, just in case + + + def start(self): + loc = gdb.execute("numa pagemap -raw {}".format(self.var), to_string=True) + self.__results[self.name] = int(loc[:-1]) + + def stop(self, paused=False): + pass -__COUNTERS__ = [numa_node_info] + def to_log(self, ongoing=False): + return self.__results + + +__COUNTERS__ = [numa_node_info, numa_location_info] diff --git a/model/profiling/info/omp.py b/model/profiling/info/omp.py index 2377e87719cb58d6b792166dbaa6ab7e70542b88..aa71ce70992c78c8575f8233f398e64175ce5706 100644 --- a/model/profiling/info/omp.py +++ b/model/profiling/info/omp.py @@ -13,15 +13,15 @@ class omp_loop_len_info(): name = "omp loop len counter" def __init__(self): - self.__results = {"omp_loop_bounds": loop, - "omp_loop_len": 0} + self.__results = {"omp_loop_start": None, + "omp_loop_len": None} def start(self): - loop[:] = 0, 0 + pass def stop(self, paused=False): - self.__results["omp_loop_len"] = loop[1] - loop[0] - self.__results["omp_loop_bounds"] = str(loop).replace("]", "[") + self.__results["omp_loop_start"] = loop[1] + self.__results["omp_loop_len"] = loop[1] - loop[0] + 1 def to_log(self, ongoing=False): return self.__results diff --git a/model/profiling/info/perf_standalone.py b/model/profiling/info/perf_standalone.py index dba3ef9baf846e8ad95fe660dfd67a22346836b0..a541368888ef547b0edebaff765286597dc7332f 100644 --- a/model/profiling/info/perf_standalone.py +++ b/model/profiling/info/perf_standalone.py @@ -89,7 +89,7 @@ class perf_info_standalone(): self.results[k] += v except TypeError: log.error("Cannot add '{}' to '{}'.".format(v, self.results[k])) - import pdb;pdb.set_trace() + self.results[k] = "{}/{}".format(self.results[k], v) def reset(self): @@ -146,9 +146,13 @@ class perf_info_standalone(): pass except Exception as e: - import pdb;pdb.set_trace() - desc = line - val = str(e) #, "" + if "Error" in line or 'Fatal' in line: + desc = "PERF ERROR" + val = line + else: + import pdb;pdb.set_trace() + desc = line + val = str(e) #, "" results[desc] = val diff --git a/model/profiling/info/proc.py b/model/profiling/info/proc.py index eea372b289be0174de9718537c19602bf95d2866..ac07b3a46262474dea70b749fae04da60190d020 100644 --- a/model/profiling/info/proc.py +++ b/model/profiling/info/proc.py @@ -108,7 +108,7 @@ class proc_stat_info(proc_info): def __init__(self): proc_info.__init__(self) self.filename = "/proc/{}/stat" - self.results = {"utime":0, "stime":0, "maj_flt":0} + self.results = {"utime":0, "stime":0, "maj_flt":0, "cmaj_flt":0,"min_flt":0,"cmin_flt":0,} def parse(self): KEYS = ["pid", "tcomm", "state", "ppid", "pgid", "sid", @@ -128,4 +128,4 @@ class proc_stat_info(proc_info): return status -__COUNTERS__ = [proc_stat_info, proc_io_info, proc_status_info] +__COUNTERS__ = [proc_stat_info] #, proc_io_info, proc_status_info] diff --git a/model/profiling/interaction/counters.py b/model/profiling/interaction/counters.py index 78c8b6fd5e9c6d301de90414c175935ca540eb78..9fab11fe2ad6250b8f56c5c567c0abac8f4b533f 100644 --- a/model/profiling/interaction/counters.py +++ b/model/profiling/interaction/counters.py @@ -1,7 +1,47 @@ +import subprocess + import gdb +import logging; log = logging.getLogger(__name__) +log_user = logging.getLogger("mcgdb.log.user") + DEFAULT = "instructions,cycles,task-clock" + +def perf_list(): + command = ['perf', 'list'] + perf = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + stdout, stderr = perf.communicate() + perf.wait() + + assert perf.returncode is not None + + try: + stdout = stdout.decode("ascii") + except Exception: + pass + + return stdout + +def test_perf_counters(counters): + command = ['perf', 'stat', '-x,', + '-e', counters, + 'sleep', '0.001'] + + perf = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + stdout, stderr = perf.communicate() + perf.wait() + assert perf.returncode is not None + + try: + stderr = stderr.decode("ascii") + except Exception: + pass + + return perf.returncode == 0, stderr + class param_perf_counters(gdb.Parameter): + """See `perf list` for more details about the expected values.""" + __self = None @classmethod @@ -11,8 +51,15 @@ class param_perf_counters(gdb.Parameter): except ValueError: # not initialized return DEFAULT - + def __init__ (self): + self.show_doc = \ + self.set_doc = """ +Set of counters used for profiling with `perf stat -e ...` : + +{} + """.format(perf_list()) + gdb.Parameter.__init__(self, "profile-perf-counters", gdb.COMMAND_OBSCURE, gdb.PARAM_STRING) @@ -20,8 +67,21 @@ class param_perf_counters(gdb.Parameter): assert param_perf_counters.__self is None param_perf_counters.__self = self + self.prev = DEFAULT + def get_set_string(self): - return "perf profiling counters set to '{}'".format(self.value) + ok, stderr = test_perf_counters(self.value) + if ok: + self.prev = self.value + return "perf profiling counters set to '{}'".format(self.value) + else: + bad_val = self.value + self.value = self.prev + + log_user.error("Could not set perf profiling counters to '{}'.".format(bad_val)) + log_user.warning(stderr) + + return "perf profiling counters unchanged ('{}')".format(self.prev) def get_show_string (self, svalue): return "perf profiling counters are '{}'".format(svalue) diff --git a/model/profiling/interaction/graph.py b/model/profiling/interaction/graph.py index 3d496d1e5b5b42cfe7c52740b49887e0f7b8529f..b21fc54defb4b1f8829861b4a11f872c04a06505 100644 --- a/model/profiling/interaction/graph.py +++ b/model/profiling/interaction/graph.py @@ -7,11 +7,6 @@ import gdb profile = None # resolved in initialize() -def comparison(record, name): - print("comparison: {}".format(name)) - print(record) - return - def sequential(record, name): join = " ; " if record and isinstance(record[0], str) else " " @@ -19,31 +14,30 @@ def sequential(record, name): print('{} | {}'.format(name, values)) -def stack(record, name): - print("soustractive: {}".format(name)) - print(record["soustractive"]) - return class cmd_profile_graph_plot (gdb.Command): def __init__ (self): gdb.Command.__init__(self, "profile graph plot", gdb.COMMAND_OBSCURE) @staticmethod - def details_to_graph(prof, profile_info, key, - ongoing=False, - firstlast=False, per_fct=False, soustractive=False): + def details_to_graph(prof, profile_info, key, ongoing=False): def print_a_set(info_set): if key is None: keys = set() for info in info_set: results = info.to_log(ongoing) + + if not results: + continue + if key is None: keys.update(results.keys()) continue - if not results or key not in results: continue - + if key not in results: + continue + if isinstance(results[key], str): return results[key] try: @@ -59,18 +53,12 @@ class cmd_profile_graph_plot (gdb.Command): def print_sets(info_sets): return [print_a_set(info_set) for info_set in info_sets] - if prof.do_first_last and firstlast: - return print_a_set(profile_info.firstlast) - - if prof.do_per_function and per_fct: - return print_sets(profile_info.per_fct) - - if prof.do_soustractive and soustractive: - return print_sets(profile_info.soustractive) - - log.info("Option asked incompatible with profile configuration") + return print_a_set(profile_info.firstlast) + + def invoke (self, args, from_tty): + no_run_warn = " -no-run-warn" in args + args = args.replace(" -no-run-warn", "") - def invoke (self, args, from_tty, show_data=False, show_keys=False): prof_id, _, key = args.partition(" ") prof_type = "first last" @@ -79,26 +67,9 @@ class cmd_profile_graph_plot (gdb.Command): elif prof_id == "all": pass else: - types = {"first":"first last", - "soustractive":"soustractive", - "function":'per_fct'} - - try: - prof_type = types[prof_id] - - prof_id, _, key = key.partition(" ") - - if prof_id != "all": - prof_id = int(prof_id) - - if not key: - raise Exception("no key ...") - - except Exception: - log_user.error("Expected [first|soustractive|function] <profile id|all> <key> arguments.") - return + log_user.error("Expected <profile id|all> <key> arguments.") + return - assert show_keys or key found = False to_all_graph = [] @@ -109,32 +80,17 @@ class cmd_profile_graph_plot (gdb.Command): found = True if prof.running: - log_user.warn("Skipping {} #{}, it's running".format(prof, prof.numbers[profile.Profile])) + if not no_run_warn: + log_user.warn("Skipping {} #{}, it's running".format(prof, prof.numbers[profile.Profile])) continue thread_key = prof.thread_key - to_graph = defaultdict(list) - - if show_keys: - per_thread = prof.all_per_thead_infos[0][thread_key] - log_user.info("Profile id={}".format(prof_id)) - log_user.warn(", ".join(self.details_to_graph(prof, per_thread, None, firstlast=True))) - continue + to_graph = defaultdict(list) for per_thread_dict in prof.all_per_thead_infos: per_thread = per_thread_dict[thread_key] - if prof_type == "first last": - to_graph["first last"].append(self.details_to_graph(prof, per_thread, key, firstlast=True)) - if prof_type == "per_fct": - to_graph["per_fct"].append(self.details_to_graph(prof, per_thread, key, per_fct=True)) - if prof_type == "soustractive": - to_graph["soustractive"].append(self.details_to_graph(prof, per_thread, key, soustractive=True)) - - if show_data: - log_user.info("Profile id={}, key={}, {} values".format(prof_id, key, len(to_graph[prof_type]))) - log_user.warn(to_graph[prof_type]) - continue + to_graph["first last"].append(self.details_to_graph(prof, per_thread, key)) if prof_id == "all": if to_graph[prof_type][0] is None: @@ -146,35 +102,70 @@ class cmd_profile_graph_plot (gdb.Command): if not to_graph: log.warn("Nothing to plot ...") - elif len(prof.all_per_thead_infos) == 1: - stack(to_graph, "{} (stack)".format(key)) - comparison(to_graph, "{}".format(key)) - else: - sequential(to_graph[prof_type], "{} (sequential)".format(prof_type)) break if prof_id == "all": sequential(to_all_graph, key) - pass if not found: log_user.error("No profile with id={}".format(prof_id)) -class cmd_profile_graph_show_data (cmd_profile_graph_plot): - def __init__ (self, plot): - gdb.Command.__init__(self, "profile graph show-data", gdb.COMMAND_OBSCURE) - self.plot = plot +class cmd_profile_graph_plot_all (cmd_profile_graph_plot): + def __init__ (self): + gdb.Command.__init__(self, "profile graph plot-all", gdb.COMMAND_OBSCURE) def invoke (self, args, from_tty): - self.plot.invoke(args, from_tty, show_data=True) + prof_id, _, key = args.partition(" ") -class cmd_profile_graph_show_keys (cmd_profile_graph_plot): - def __init__ (self, plot): - gdb.Command.__init__(self, "profile graph show-keys", gdb.COMMAND_OBSCURE) - self.plot = plot + if prof_id.isdigit(): + prof_id = int(prof_id) + elif prof_id == "all": + pass + + keys = gdb.execute("profile graph keys {} -raw".format(args), to_string=True) + for key in sorted(keys[:-1].split(",")): + gdb.execute("profile graph plot {} {} -no-run-warn".format(args, key)) + +class cmd_profile_graph_keys(gdb.Command): + def __init__ (self): + gdb.Command.__init__(self, "profile graph keys", gdb.COMMAND_OBSCURE) def invoke (self, args, from_tty): - self.plot.invoke(args, from_tty, show_keys=True) + prof_id, _, key = args.partition(" ") + + if prof_id.isdigit(): + prof_id = int(prof_id) + elif prof_id == "all": + pass + elif not prof_id: + prof_id = "all" + else: + log.error("Expected profile id or 'all' in parameters.") + return + + raw = "-raw" in args + if raw or prof_id == "all": + key_set = set() + + for prof in profile.Profile.profiles: + if prof_id not in ("all", prof.numbers[profile.Profile]): + continue + + thread_key = prof.thread_key + + per_thread = prof.all_per_thead_infos[0][thread_key] + + keys = cmd_profile_graph_plot.details_to_graph(prof, per_thread, None) + if not raw and not prof_id == "all": + log_user.info("Profile id={}".format(prof.numbers[profile.Profile])) + log_user.warn(", ".join(keys)) + else: + map(key_set.add, keys) + if raw: + print(",".join(key_set)) + elif prof_id == "all": + log_user.info("Keys for all the profiles:") + log_user.warn(", ".join(key_set)) class cmd_profile_graph (gdb.Command): def __init__ (self): @@ -197,8 +188,9 @@ class cmd_profile_graph (gdb.Command): def on_load(): plot = cmd_profile_graph_plot() - cmd_profile_graph_show_data(plot) - cmd_profile_graph_show_keys(plot) + + cmd_profile_graph_plot_all() + cmd_profile_graph_keys() def initialize(): global profile diff --git a/model/profiling/interaction/graph_offline.py b/model/profiling/interaction/graph_offline.py index 562b6815b7d7c450712a3ae01bed8077b3fad608..aa9aec1c63ad1492fd85af86e34a30c45a469073 100644 --- a/model/profiling/interaction/graph_offline.py +++ b/model/profiling/interaction/graph_offline.py @@ -108,6 +108,7 @@ End-of-line modifiers: do_sort = values values = sorted(do_sort) name += " (sorted)" + elif do_sort: values = [y for (x,y) in sorted(zip(do_sort, values))] @@ -119,7 +120,7 @@ End-of-line modifiers: elif do_pick: value_to_pick, pick_values, pick_name = do_pick - + name = "{} (picked from {} = {})".format(name, pick_name, value_to_pick) values = [v for p, v in zip(pick_values, values) if p == value_to_pick] @@ -165,4 +166,3 @@ End-of-line modifiers: chart.title = title log_user.info("Rendering chart plot of {} into {}".format(title, target)) chart.render_to_png(target) - shutil.copy(target, "".join(DEFAULT)) diff --git a/model/profiling/interaction/manual.py b/model/profiling/interaction/manual.py index 01c87d843b1ba71fd84ac33f9f5ded458308beb6..f25750620a7462ad616cd12172c60e73e0d61a67 100644 --- a/model/profiling/interaction/manual.py +++ b/model/profiling/interaction/manual.py @@ -26,11 +26,12 @@ class cmd_profile_manual_start (gdb.Command): log.error("Manual profiling already ongoing ...") return - log_user.info("start manual profiling") + log_user.info("start manual profiling ...") prof = cmd_profile_manual_start.profile = profile.ManualProfile() gdb.execute("profile configure {} first-last only".format(prof.numbers[profile.Profile])) prof.start() + log_user.info("manual profiling started.") class cmd_profile_manual_stop (gdb.Command): def __init__ (self): diff --git a/model/profiling/interaction/summary.py b/model/profiling/interaction/summary.py index c9deb3ff44f8f7d11b80e42272e9f2770ad49f1c..f7eb0348e532ec103e178495100bf26e8a4dd9b0 100644 --- a/model/profiling/interaction/summary.py +++ b/model/profiling/interaction/summary.py @@ -81,7 +81,7 @@ class cmd_profile_info (gdb.Command): continue log_user.warn("#{} {}".format(prof.numbers[profile.Profile], prof)) - log_user.info("\t{} hits".format(len(prof.all_per_thead_infos))) + #log_user.info("\t{} hits".format(len(prof.all_per_thead_infos))) if do_config: log_user.info("") diff --git a/model/profiling/profile.py b/model/profiling/profile.py index 2bd95c15de98d7d37e0b9b66fbabf4977e298847..1015ee67bb537bf8266501793b8714dc5a629d0b 100644 --- a/model/profiling/profile.py +++ b/model/profiling/profile.py @@ -143,13 +143,13 @@ class Profile(): infoset.stop() - def need_to_stop(): - + def need_to_stop(): for cpt, cmpt, val in Profile.breakpoints: for res in infoset.get(cpt): if any([cmpt == "==" and res == val, cmpt == ">" and res > val, cmpt == "<" and res < val]): return cpt, cmpt, val + stop = need_to_stop() if stop: log_user.warn("Profiling breakpoint: Found {}".format(" ".join(map(str,stop)))) diff --git a/model/task/environment/openmp/capture/iomp/kmpc_for_static.py b/model/task/environment/openmp/capture/iomp/kmpc_for_static.py index b799239d07b110e9d863467afd69850ebf00b8ea..ab580e806983b6ce538645013664ec39fea54424 100644 --- a/model/task/environment/openmp/capture/iomp/kmpc_for_static.py +++ b/model/task/environment/openmp/capture/iomp/kmpc_for_static.py @@ -64,20 +64,23 @@ class kmpc_for_static_fini_Breakpoint(OmpFunctionBreakpoint): stop_after = loop.break_after_next loop.break_after_next = False - data["old-scheduler-locking"] = gdb.parameter("scheduler-locking") if stop_after: + data["old-scheduler-locking"] = gdb.parameter("scheduler-locking") gdb.execute("set scheduler-locking on") return False, stop_after, data def prepare_after(self, data): gdb.execute("set scheduler-locking {}".format(data["old-scheduler-locking"])) - log.warn("Stopped in worker {} after loop {} iteration {}-{}.".format( + log.warn("Stopped in {} after loop {} iteration {}-{}.".format( data["worker"], data["loop"], data["iteration"][0], data["iteration"][1])) + if data["loop"].done: + log.info("Loop {} is completed.".format(data["loop"])) + return True diff --git a/model/task/environment/openmp/interaction/loop.py b/model/task/environment/openmp/interaction/loop.py index eb8f1f9bf18e788d4caad86f4496273690a7bc51..aa1f611c03db3a0b93183cbc25ab5588231e5136 100644 --- a/model/task/environment/openmp/interaction/loop.py +++ b/model/task/environment/openmp/interaction/loop.py @@ -21,7 +21,7 @@ def activate(): cmd_omp_loop_last_range() cmd_omp_loop_break() cmd_omp_loop_break_after_next() - + fct_omp_loop_start() aspect.register("loop-profile", loop_aspects) class cmd_omp_loop (gdb.Command): @@ -103,6 +103,7 @@ def loop_aspects(Tracks): gdb.execute("profile manual start") def stop_work_of(this): + log.warn("stop work of {}".format(current_worker())) if iteration_profiling_enabled: gdb.execute("profile manual stop") gdb.execute("set scheduler-locking off") @@ -111,5 +112,24 @@ def loop_aspects(Tracks): global iteration_profiling_enabled if iteration_profiling_enabled: + # set loop.break_after_next = True + mcgdb.interaction.push_stop_request("Iteration profiling finished.") iteration_profiling_enabled = False + + +class fct_omp_loop_start(gdb.Function): + def __init__ (self): + gdb.Function.__init__(self, "omp_loop_start") + + def invoke (self): + worker = current_worker() + loop, iteration = representation.ForLoopJob.get_last_loop_of(worker) + + if not loop: return None + + lower, upper, done = iteration + + return lower + + diff --git a/model/task/environment/openmp/representation.py b/model/task/environment/openmp/representation.py index 27fd45f31baf97a49a1020ec3d4521f498d43826..4a99584a205d6d5d7a8a1fa934ea0d1601bd2e4c 100644 --- a/model/task/environment/openmp/representation.py +++ b/model/task/environment/openmp/representation.py @@ -1074,6 +1074,7 @@ class ForLoopJob(Job, aspect.Tracker): self.lower = lower self.upper = upper self.incr = incr + self.is_done = False self.working = {} @@ -1103,7 +1104,7 @@ class ForLoopJob(Job, aspect.Tracker): def done(self): print("LOOP {} done".format(self)) - pass + self.is_done = True class Init: break_after_init = False