-
GUILLEMOT Alexandre authoredGUILLEMOT Alexandre authored
synthesize.py 7.98 KiB
import json
import os
import numpy
import argparse
import math
parser = argparse.ArgumentParser(description = "A python script to generate a table of results out of given entries.")
parser.add_argument("entries", help = "A JSON file containing entries.")
args = parser.parse_args()
# Recovering data
assert os.path.exists(args.entries), "Specify valid path"
entries_file = open(args.entries, "r")
try:
entries = json.load(entries_file)
header_dict = entries["header"]
data_list = entries["data_list"]
except:
entries_file.close()
print("Wrong file format")
exit(1)
# Output format functions
def time_format(t):
t = float(t)
if t == 0:
return ""
elif t < 0.001:
return "$<$ 1 ms"
elif t < 10:
return str(math.ceil(t*1000)) + " ms"
elif t < 600:
return str(int(t)) + " s"
elif t < 3600:
return str(int(t/60)) + " min"
else:
return str(int(t/3600)) + " h"
def time_format_2(t):
t = float(t)
if t == 0:
return ""
elif t < 0.1:
return "$<$ 0.1"
elif t < 10:
return "{0:.1f}".format(t)
elif t < 600:
return str(int(t))
elif t < 3600:
return str(int(t/60)) + " min"
else:
return str(int(t/3600)) + " h"
def steps_format(N):
if N == 0:
return ""
elif N < 10_000:
return str(math.ceil(N))
else:
return str(math.ceil(N/1_000.0)) + "\\,k"
def steps_format_2(N):
if N == 0:
return ""
elif N < 100:
return "$<$ 0.01"
elif N < 1_000:
return "{0:.2f}".format(N/1_000.0)
if N < 10_000:
return "{0:.1f}".format(N/1_000.0)
else:
return str(math.ceil(N/1_000.0))
def bytes_format(N):
if N == 0:
return ""
elif N < 2**10:
return str(N)
elif N < 2**20:
return str(math.ceil(float(N)/2**10)) + " KB"
elif N < 2**30:
return str(math.ceil(float(N)/2**20)) + " MB"
else:
return str(math.ceil(float(N)/2**30)) + " GB"
# Header function
def fusion(LT1, LT2):
if LT1 == []:
return LT2
elif LT2 == []:
return LT1
else:
m = len(LT1)
n = len(LT2)
LT1 += [
[{"str": "", "w": node["w"]} for node in LT1[-1]]
for _ in range(n - m)
]
LT2 += [
[{"str": "", "w": node["w"]} for node in LT2[-1]]
for _ in range(m - n)
]
return [l1 + l2 for l1, l2 in zip(LT1, LT2)]
def _header(header_dict):
res = []
if header_dict["children"] != 0:
for child in header_dict["children"]:
res = fusion(res, _header(child))
if res == []:
res.append([{"str": header_dict["val"], "w": 1}])
else:
res.append([{"str": header_dict["val"], "w": len(res[0])}])
return res
def header(header_dict):
res = _header(header_dict)[:-1]
res.reverse()
return res
# Table function
def get_leaves(D):
L = []
if D["children"] == []:
L = [str(D["val"])]
else:
for child in D["children"]:
L += get_leaves(child)
return L
def degrees(data):
name = data.split("_")[1].split("-")[0]
auxi = [int(d) for d in data.split("_")[1].split("-")[1:]]
if name == "dense":
return auxi
elif name == "structured":
return [auxi[1] for _ in range(auxi[0])]
elif name == "katsura":
return [1] + [2 for _ in range(auxi[0])]
else:
return [-1]
#. Formulas for the different statistics to computes
data_stats = {
"raw name": lambda data: data.replace("_", "\_"),
"name": lambda data: data.split("_")[1].split("-")[0] + " *"*(not "all" in data) + " \\textsuperscript{N}"*("newton" in data),
"dimension": lambda data: len(json.load(open(f"data/{data}.json", "r"))["system"]),
"max deg": lambda data: int(max(degrees(data))),
"paths": lambda data: len(json.load(open(f"data/{data}.json", "r"))["fiber"]),
"f": lambda data: json.load(open(f"benchmarks/{data}/algpath/out.json", "r"))["instructions"]["f"],
"df": lambda data: json.load(open(f"benchmarks/{data}/algpath/out.json", "r"))["instructions"]["df"],
"bezout": lambda data: str(numpy.prod(degrees(data))),
"medmean hc algpath": lambda data: '{0:.2f}'.format(float(result_stats["medsteps"]["f"](json.load(open(f"benchmarks/{data}/algpath/out.json", "r")), data))/float(result_stats["medsteps"]["f"](json.load(open(f"benchmarks/{data}/homotopycontinuation/out.json", "r")), data))),
"htype": lambda data: "N" if data.split("_")[0] == "newton" else ("T" if data.split("_")[0] == "linear" else "?")
}
result_stats = {
"failures": lambda res: str(res["failures"]) if res["failures"] != 0 else "",
"tottime": lambda res: time_format_2(res["overheadtime"]) if "overheadtime" in res else time_format_2(res["time"]),
"meantime": lambda res: time_format(float(res["time"])/len(res["steplist"])),
"totsteps": lambda res: steps_format(sum([int(p) for p in res["steplist"] if p is not None])),
"stepseries": lambda res: res["steplist"],
"stepspersec": lambda res: steps_format_2(float(sum([int(p) for p in res["steplist"] if p is not None]))/float(res["time"])),
"timeperstep": lambda res: '{0:.2e}'.format(1000*float(res["time"])/float(sum([int(p) for p in res["steplist"] if p is not None]))),
"meansteps": lambda res: steps_format(round(numpy.mean([int(p) for p in res["steplist"] if p is not None]), 1)),
"stdsteps": lambda res: steps_format(round(numpy.std([int(p) for p in res["steplist"] if p is not None]), 1)),
"medsteps": lambda res: steps_format(numpy.median([int(p) for p in res["steplist"] if p is not None])),
"minsteps": lambda res: steps_format(min([int(p) for p in res["steplist"] if p is not None])),
"maxsteps": lambda res: steps_format(max([int(p) for p in res["steplist"] if p is not None])),
"q1steps": lambda res: steps_format(round(numpy.quantile([int(p) for p in res["steplist"] if p is not None], 0.25), 1)),
"q3steps": lambda res: steps_format(round(numpy.quantile([int(p) for p in res["steplist"] if p is not None], 0.75), 1))
}
def _table(data, branch, node):
key = node["val"]
# Data stats
if key in data_stats:
try:
return [{"str": data_stats[key](data), "w": 1}]
except:
return [{"str": "", "w": 1}]
# Local stats
if key in result_stats:
pkg_name = branch[0]
results = json.load(open(f"benchmarks/{data}/{pkg_name}/out.json", "r"))
try:
return [{"str": result_stats[key](results), "w": 1}]
except:
return [{"str": "", "w": 1}]
# Packages handling
if key in ["homotopycontinuation", "algpath", "macaulay2", "sirocco"]:
dir = f"benchmarks/{data}/{key}/"
if not os.path.exists(dir):
return [{"str": "not benchmarked", "w": len(get_leaves(node))}]
info_dict = json.load(open(f"{dir}info.json"))
if info_dict["timeout error"]:
timeout = info_dict["timeout"]
return [{"str": f"$>$ {time_format(timeout)}", "w": len(get_leaves(node))}]
if info_dict["memory error"]:
memory = info_dict["memory"]
return [{"str": f"$>$ {bytes_format(memory)}", "w": len(get_leaves(node))}]
if info_dict["script error"]:
return [{"str": "script error", "w": len(get_leaves(node))}]
if info_dict["killed manually"]:
return [{"str": "killed", "w": len(get_leaves(node))}]
# By default, compute recursively the table
# This is the case for the following node values: "instructions"
return sum([_table(data, [key] + branch, child) for child in node["children"]], [])
def table(data, dict):
return _table(data, [], dict)
# Computing the table and writing it in a .json file
table_dict = {"header": header(header_dict), "table": [table(data, header_dict) for data in data_list]}
table_file = open(f"{os.path.dirname(args.entries)}/table.json", "w")
json.dump(table_dict, table_file, indent = 2)
table_file.close()