synthesize.py

import json
import os
import numpy
import argparse
import math

parser = argparse.ArgumentParser(description = "A python script to generate a table of results out of given entries.")
parser.add_argument("entries", help = "A JSON file containing entries.")
args = parser.parse_args()


# Recovering data
assert os.path.exists(args.entries), "Specify valid path"

entries_file = open(args.entries, "r")
try:
    entries = json.load(entries_file)
    header_dict = entries["header"]
    data_list = entries["data_list"]
except:
    entries_file.close()
    print("Wrong file format")
    exit(1)


# Output format functions
def time_format(t):
    t = float(t)
    if t == 0:
        return ""
    elif t < 0.001:
        return "$<$ 1 ms"
    elif t < 10:
        return str(math.ceil(t*1000)) + " ms"
    elif t < 600:
        return str(int(t)) + " s"
    elif t < 3600:
        return str(int(t/60)) + " min"
    else:
        return str(int(t/3600)) + " h"

def time_format_2(t):
    t = float(t)
    if t == 0:
        return ""
    elif t < 0.1:
        return "$<$ 0.1"
    elif t < 10:
        return "{0:.1f}".format(t)
    elif t < 600:
        return str(int(t))
    elif t < 3600:
        return str(int(t/60)) + " min"
    else:
        return str(int(t/3600)) + " h"

def steps_format(N):
    if N == 0:
        return ""
    elif N < 10_000:
        return str(math.ceil(N))
    else:
        return str(math.ceil(N/1_000.0)) + "\\,k"

def steps_format_2(N):
    if N == 0:
        return ""
    elif N < 100:
        return "$<$ 0.01"
    elif N < 1_000:
        return "{0:.2f}".format(N/1_000.0)
    if N < 10_000:
        return "{0:.1f}".format(N/1_000.0)
    else:
        return str(math.ceil(N/1_000.0))

def bytes_format(N):
    if N == 0:
        return ""
    elif N < 2**10:
        return str(N)
    elif N < 2**20:
        return str(math.ceil(float(N)/2**10)) + " KB"
    elif N < 2**30:
        return str(math.ceil(float(N)/2**20)) + " MB"
    else:
        return str(math.ceil(float(N)/2**30)) + " GB"


# Header function
def fusion(LT1, LT2):
    if LT1 == []:
        return LT2
    elif LT2 == []:
        return LT1
    else:
        m = len(LT1)
        n = len(LT2)
        LT1 += [
            [{"str": "", "w": node["w"]} for node in LT1[-1]]
            for _ in range(n - m)
        ]
        LT2 += [
            [{"str": "", "w": node["w"]} for node in LT2[-1]]
            for _ in range(m - n)
        ]
        return [l1 + l2 for l1, l2 in zip(LT1, LT2)]

def _header(header_dict):
    res = []
    if header_dict["children"] != 0:
        for child in header_dict["children"]:
            res = fusion(res, _header(child))
        if res == []:
            res.append([{"str": header_dict["val"], "w": 1}])
        else:
            res.append([{"str": header_dict["val"], "w": len(res[0])}])
    return res


def header(header_dict):
    res = _header(header_dict)[:-1]
    res.reverse()
    return res


# Table function
def get_leaves(D):
    L = []
    if D["children"] == []:
        L = [str(D["val"])]
    else:
        for child in D["children"]:
            L += get_leaves(child)
    return L

def degrees(data):
    name = data.split("_")[1].split("-")[0]
    auxi = [int(d) for d in data.split("_")[1].split("-")[1:]]
    if name == "dense":
        return auxi
    elif name == "structured":
        return [auxi[1] for _ in range(auxi[0])]
    elif name == "katsura":
        return [1] + [2 for _ in range(auxi[0])]
    else:
        return [-1]

#. Formulas for the different statistics to computes
data_stats = {
    "raw name": lambda data: data.replace("_", "\_"),
    "name": lambda data: data.split("_")[1].split("-")[0] + " *"*(not "all" in data) + " \\textsuperscript{N}"*("newton" in data),
    "dimension": lambda data: len(json.load(open(f"data/{data}.json", "r"))["system"]),
    "max deg": lambda data: int(max(degrees(data))),
    "paths": lambda data: len(json.load(open(f"data/{data}.json", "r"))["fiber"]),
    "f": lambda data: json.load(open(f"benchmarks/{data}/algpath/out.json", "r"))["instructions"]["f"],
    "df": lambda data: json.load(open(f"benchmarks/{data}/algpath/out.json", "r"))["instructions"]["df"],
    "bezout": lambda data: str(numpy.prod(degrees(data))),
    "medmean hc algpath": lambda data: '{0:.2f}'.format(float(result_stats["medsteps"]["f"](json.load(open(f"benchmarks/{data}/algpath/out.json", "r")), data))/float(result_stats["medsteps"]["f"](json.load(open(f"benchmarks/{data}/homotopycontinuation/out.json", "r")), data))),
    "htype": lambda data: "N" if data.split("_")[0] == "newton" else ("T" if data.split("_")[0] == "linear" else "?")
}

result_stats = {
    "failures": lambda res: str(res["failures"]) if res["failures"] != 0 else "",
    "tottime": lambda res: time_format_2(res["overheadtime"]) if "overheadtime" in res else time_format_2(res["time"]),
    "meantime": lambda res: time_format(float(res["time"])/len(res["steplist"])),
    "totsteps": lambda res: steps_format(sum([int(p) for p in res["steplist"] if p is not None])),
    "stepseries": lambda res: res["steplist"],
    "stepspersec": lambda res: steps_format_2(float(sum([int(p) for p in res["steplist"] if p is not None]))/float(res["time"])),
    "timeperstep": lambda res: '{0:.2e}'.format(1000*float(res["time"])/float(sum([int(p) for p in res["steplist"] if p is not None]))),
    "meansteps": lambda res: steps_format(round(numpy.mean([int(p) for p in res["steplist"] if p is not None]), 1)),
    "stdsteps": lambda res: steps_format(round(numpy.std([int(p) for p in res["steplist"] if p is not None]), 1)),
    "medsteps": lambda res: steps_format(numpy.median([int(p) for p in res["steplist"] if p is not None])),
    "minsteps": lambda res: steps_format(min([int(p) for p in res["steplist"] if p is not None])),
    "maxsteps": lambda res: steps_format(max([int(p) for p in res["steplist"] if p is not None])),
    "q1steps": lambda res: steps_format(round(numpy.quantile([int(p) for p in res["steplist"] if p is not None], 0.25), 1)),
    "q3steps": lambda res: steps_format(round(numpy.quantile([int(p) for p in res["steplist"] if p is not None], 0.75), 1))
}

def _table(data, branch, node):
    key = node["val"]

    # Data stats
    if key in data_stats:
        try:
            return [{"str": data_stats[key](data), "w": 1}]
        except:
            return [{"str": "", "w": 1}]

    # Local stats
    if key in result_stats:
        pkg_name = branch[0]
        results = json.load(open(f"benchmarks/{data}/{pkg_name}/out.json", "r"))
        try:
            return [{"str": result_stats[key](results), "w": 1}]
        except:
            return [{"str": "", "w": 1}]

    # Packages handling
    if key in ["homotopycontinuation", "algpath", "macaulay2", "sirocco"]:
        dir = f"benchmarks/{data}/{key}/"
        if not os.path.exists(dir):
            return [{"str": "not benchmarked", "w": len(get_leaves(node))}]

        info_dict = json.load(open(f"{dir}info.json"))
        if info_dict["timeout error"]:
            timeout = info_dict["timeout"]
            return [{"str": f"$>$ {time_format(timeout)}", "w": len(get_leaves(node))}]

        if info_dict["memory error"]:
            memory = info_dict["memory"]
            return [{"str": f"$>$ {bytes_format(memory)}", "w": len(get_leaves(node))}]

        if info_dict["script error"]:
            return [{"str": "script error", "w": len(get_leaves(node))}]

        if info_dict["killed manually"]:
            return [{"str": "killed", "w": len(get_leaves(node))}]

    # By default, compute recursively the table
    # This is the case for the following node values: "instructions"
    return sum([_table(data, [key] + branch, child) for child in node["children"]], [])

def table(data, dict):
    return _table(data, [], dict)


# Computing the table and writing it in a .json file
table_dict = {"header": header(header_dict), "table":  [table(data, header_dict) for data in data_list]}
table_file = open(f"{os.path.dirname(args.entries)}/table.json", "w")
json.dump(table_dict, table_file, indent = 2)
table_file.close()