Mentions légales du service

Skip to content
Snippets Groups Projects
Commit 71e25551 authored by GRUBER Fabian's avatar GRUBER Fabian
Browse files

Wrote scripts to solve port-mapping ILP problem.

cherry-pick of several intermediate commits.
  00685293 from Thu Jul 4 11:00:48 2019 +0200.
  9386c13a from Thu May 16 08:09:25 2019 +0200
  3813e316 from Fri May 10 16:56:23 2019 +0200
  50ee0275 from Sun May 5 10:19:47 2019 +0200
  0b54af76 from Sat May 4 20:04:48 2019 +0200
  d07f97d1 from Sat May 4 15:30:37 2019 +0200
  e924c740 from Sat May 4 15:21:27 2019 +0200
  e3138dad from Sat May 4 14:00:44 2019 +0200
  a58a53c1 from Fri May 3 16:42:26 2019 +0200

ILP model: save latest version.

Many a long night was spent hacking on this.
Deadlines where tight.
Midnight oil was burnt.
So I didn't commit intermediate stages.
parent c2bc8b3c
Branches
No related tags found
No related merge requests found
...@@ -33,6 +33,7 @@ from pipedream.utils import chunks, nub ...@@ -33,6 +33,7 @@ from pipedream.utils import chunks, nub
import pipedream.asm.ir as ir import pipedream.asm.ir as ir
import pipedream.benchmark.common as common import pipedream.benchmark.common as common
import pipedream.utils.yaml as yaml import pipedream.utils.yaml as yaml
import pipedream.utils.json as json
BLACKLISTED_INSTRUCTIONS = frozenset([ BLACKLISTED_INSTRUCTIONS = frozenset([
# 'OR_GPR16i16_IMMi16', # 'OR_GPR16i16_IMMi16',
...@@ -77,7 +78,7 @@ def main(): ...@@ -77,7 +78,7 @@ def main():
# inputs # inputs
subp.add_argument('--measurements', subp.add_argument('--measurements',
dest='measurements_file', dest='measurements_input',
required=True, required=True,
type=pathlib.Path, type=pathlib.Path,
help='File to read benchmark results from',) help='File to read benchmark results from',)
...@@ -93,7 +94,11 @@ def main(): ...@@ -93,7 +94,11 @@ def main():
# outputs # outputs
subp.add_argument('-eo', '--eq-class-output', type=pathlib.Path, default=pathlib.Path('/dev/stdout')) subp.add_argument('-eo', '--eq-class-output', type=pathlib.Path, default=pathlib.Path('/dev/stdout'))
subp.add_argument('-mo', '--measurement-output', type=pathlib.Path, default=None) subp.add_argument('-mo', '--measurements-output',
dest='measurements_output',
required=False,
type=pathlib.Path,
help='File to write benchmark results to',)
subp.add_argument('--yaml-log', default=None, type=pathlib.Path) subp.add_argument('--yaml-log', default=None, type=pathlib.Path)
subp.add_argument('--json-log', default=None, type=pathlib.Path) subp.add_argument('--json-log', default=None, type=pathlib.Path)
...@@ -147,17 +152,17 @@ def main(): ...@@ -147,17 +152,17 @@ def main():
# inputs # inputs
subp.add_argument('--measurements', subp.add_argument('--measurements',
dest='measurements_file', dest='measurements_input',
required=True, required=True,
type=pathlib.Path, type=pathlib.Path,
help='File to read benchmark results from and write them back to',) help='File to read benchmark results from',)
subp.add_argument('--eq-classes', subp.add_argument('--eq-classes',
dest='eq_classes_file', dest='eq_classes_file',
required=True, required=True,
type=pathlib.Path, type=pathlib.Path,
help='File to read equivalence classes from',) help='File to read equivalence classes from',)
subp.add_argument('--tag', subp.add_argument('--tag',
default=None, required=True,
type=str, type=str,
help='Tag selector to select complex instructions',) help='Tag selector to select complex instructions',)
subp.add_argument('--min-muops', subp.add_argument('--min-muops',
...@@ -171,6 +176,13 @@ def main(): ...@@ -171,6 +176,13 @@ def main():
with a IPC/MPC stddev higher than this are ignored. with a IPC/MPC stddev higher than this are ignored.
""",) """,)
## outputs
subp.add_argument('-mo', '--measurements-output',
dest='measurements_output',
required=False,
type=pathlib.Path,
help='File to write benchmark results to',)
############################################################################## ##############################################################################
args = parser.parse_args() args = parser.parse_args()
...@@ -180,11 +192,15 @@ def main(): ...@@ -180,11 +192,15 @@ def main():
if command is None: if command is None:
parser.error('must supply a command') parser.error('must supply a command')
if args.measurements_output is None:
args.measurements_output = args.measurements_input
command(**vars(args)) command(**vars(args))
def generate_simple_ilp_input(*, def generate_simple_ilp_input(*,
measurements_file: pathlib.Path, measurements_input: pathlib.Path,
measurements_output: pathlib.Path,
eq_classes_file: pathlib.Path, eq_classes_file: pathlib.Path,
tag: ty.Optional[str], tag: ty.Optional[str],
min_muops: int, min_muops: int,
...@@ -193,21 +209,23 @@ def generate_simple_ilp_input(*, ...@@ -193,21 +209,23 @@ def generate_simple_ilp_input(*,
arch = ir.Architecture.for_name('x86') arch = ir.Architecture.for_name('x86')
instruction_set = arch.instruction_set() instruction_set = arch.instruction_set()
measurements = Benchmark_Run_Summary_Aggregator( measurements = Benchmark_Run_Aggregator(
max_stddev=max_stddev, max_stddev=max_stddev,
# *0.9 to account for dropping the bottom & top 5 percent. # *0.9 to account for dropping the bottom & top 5 percent.
min_samples=MIN_NUM_SAMPLES * 0.9, min_samples=MIN_NUM_SAMPLES * 0.9,
) )
# touch measurements file
open(measurements_output, 'a').close()
try: try:
read_measurements_from_files(measurements, [measurements_file]) read_measurements_from_files(measurements, [measurements_input])
eq_classes = read_equivalence_classes_from_file(instruction_set, eq_classes_file) eq_classes = read_equivalence_classes_from_file(instruction_set, eq_classes_file)
except FileNotFoundError as e: except FileNotFoundError as e:
print('error:', e, file=sys.stderr) print('error:', e, file=sys.stderr)
exit(1) exit(1)
complex_insts = common.glob_instruction_tags(arch, [tag]) complex_insts = list(nub(common.glob_instruction_tags(arch, [tag])))
# representatives = [random.choice(eq) for eq in eq_classes]
representatives = list(nub(eq[0] for eq in eq_classes)) representatives = list(nub(eq[0] for eq in eq_classes))
if not complex_insts: if not complex_insts:
...@@ -234,108 +252,120 @@ def generate_simple_ilp_input(*, ...@@ -234,108 +252,120 @@ def generate_simple_ilp_input(*,
ordering = Ordering.ALPHABETIC, ordering = Ordering.ALPHABETIC,
) )
EQ.make_measurements(kernels, measurements, measure_ports, output=measurements_file) kernels = [k for k in kernels if tuple(i.name for i in k) not in measurements]
log('MEASURE: muI') CHUNK_SIZE = 2000
make_measurements(nub((i,) for i in complex_insts + representatives), measure_ports=True)
assert complex_insts[0].name in measurements for chunk in chunks(kernels, CHUNK_SIZE):
try:
EQ.make_measurements(chunk, measurements, measure_ports=measure_ports, output=measurements_output)
finally:
os.sched_yield()
if min_muops: log('MEASURE: complexI', f'({len(complex_insts)})')
for i in list(complex_insts): make_measurements([(i,) for i in complex_insts], measure_ports=True)
m = measurements[i.name]
num_muops = round(m.muops.mean / m.instructions.mean) complex_insts = [i for i in complex_insts if measurements[i.name].num_unfused_muops > 1]
if num_muops < min_muops: if not complex_insts:
complex_insts.remove(i) log('no real complexI found')
return
kernels = list(combination_kernels(complex_insts, representatives)) log('MEASURE: muI ', f'({len(representatives)})')
log('MEASURE: muI x complexI', f'({len(kernels)})') make_measurements([(i,) for i in representatives], measure_ports=True)
for chunk in chunks(kernels, 1000):
make_measurements(chunk, measure_ports=False)
del kernels def combinations_of_instructions_using_only_ports(ports: ty.FrozenSet['Port']) -> ty.Iterable[ty.Tuple[ir.Instruction,
...]]:
port_sets = set(frozenset(ps) for ps in powerset(ports))
port_sets.remove(frozenset())
## higher order kernels assert frozenset([6]) in port_sets
kernels = [] assert len([i for i in representatives if i.name == 'JMP_0'])
if False: # NEW WAY, TODO IMPLEMENT combinations = collections.defaultdict(list)
for complexI in complex_insts:
mCI = measurements[complexI.name]
if MEASURE_PORTS: # FIXME: we really want a set cover for every possible subset of ports ... but that is expensive
## TODO: read ports from args or partial model for N in range(1, 8):
ports = mCI.ports_used() for kernel in itertools.combinations(representatives, N):
else: kernel_ports = frozenset()
interferes_with = set()
for muI in representatives: for inst in kernel:
mMUI = measurements[muI.name] run = measurements[inst.name]
mCI_MUI = measurements[complexI.name, muI.name]
# no interference iff MPC(A) + MPC(B) == MPC(A B) kernel_ports |= run.ports_used()
if abs((mc.mpc.mean + mr.mpc.mean) - mcr.mpc.mean) <= 0.1:
continue
interferes_with.add(muI) # if kernel_ports in combinations:
# kernel = min(kernel, combinations[kernel_ports], key=len)
ports = set() # combinations[kernel_ports] = tuple(sorted(kernel, key=lambda i: i.name))
for muI in interferes_with: combinations[kernel_ports].append(tuple(sorted(kernel, key=lambda i: i.name)))
muI_ports = ... # TODO: read partial model
ports |= muI_ports # if not port_sets:
# break
for port_set in powerset(ports): if False:
kernel = minimal_combination_of_instructions_using_only_port_set(port_set) print('COVERED ' + str(ports) + ':')
for port_set in sorted(combinations, key=lambda ps: [len(ps), ps]):
for kernel in combinations[port_set]:
print(' ', Benchmark_Spec.name_from_instruction_names(i.name for i in kernel), *sorted(port_set))
print('UNCOVERED ' + str(ports) + ':')
for port_set in sorted(port_sets, key=lambda ps: [len(ps), ps]):
if port_set not in combinations:
print(' ', *sorted(port_set))
print()
exit()
kernels.append(kernel) # print('MAX', max(map(len, combinations.values())))
kernels.append((complexI,) + kernel)
else:
interferes_with = collections.defaultdict(set)
for c in complex_insts: out = []
for r in representatives:
if c is r:
continue
mc = measurements[c.name]
mr = measurements[r.name]
if mc.ports_used() and mr.ports_used(): for port_set in sorted(combinations, key=lambda ps: [len(ps), ps]):
if not (mc.ports_used() & mr.ports_used()): kernels = combinations[port_set]
# does not use same ports. no interference kernels = sorted(kernels, key=len)[:3]
continue
else:
mcr = measurements[c.name, r.name]
# no interference iff MPC(A) + MPC(B) == MPC(A B) out += kernels
if abs((mc.mpc.mean + mr.mpc.mean) - mcr.mpc.mean) <= 0.1:
continue
interferes_with[c].add(r) return sorted(out, key=len)
for c, interferes in interferes_with.items(): assert complex_insts[0].name in measurements
mc = measurements[c.name]
num_muops = round(mc.muops.mean / mc.instructions.mean) if min_muops:
for i in list(complex_insts):
m = measurements[i.name]
for combi in combination_kernels([c], interferes, interferes): num_muops = round(m.unfused_muops.mean / m.instructions.mean)
kernels.append(combi)
log('MEASURE: muI x muI x complexI', f'({len(kernels)})') if num_muops < min_muops:
complex_insts.remove(i)
## higher order kernels
kernels = []
for chunk in chunks(kernels, 1000): for complexI in complex_insts:
make_measurements(chunk, measure_ports=False) for combi in combination_kernels([complexI], representatives):
kernels.append(combi)
log('MEASURE: muI x complexI', f'({len(kernels)})')
make_measurements(kernels, measure_ports=True)
kernels = []
for complexI in complex_insts:
for combi in combination_kernels([complexI], representatives, representatives):
kernels.append(combi)
log('MEASURE: muI x muI x complexI', f'({len(kernels)})')
make_measurements(kernels, measure_ports=True)
def find_equivalence_classes(*, def find_equivalence_classes(*,
measurements_file: ty.Optional[pathlib.Path], measurements_input: ty.Optional[pathlib.Path],
eq_classes_file: ty.Optional[pathlib.Path], eq_classes_file: ty.Optional[pathlib.Path],
tag: ty.Optional[str], tag: ty.Optional[str],
eq_class_output: ty.Optional[pathlib.Path], eq_class_output: ty.Optional[pathlib.Path],
measurement_output: ty.Optional[pathlib.Path], measurements_output: ty.Optional[pathlib.Path],
yaml_log: ty.Optional[pathlib.Path], yaml_log: ty.Optional[pathlib.Path],
json_log: ty.Optional[pathlib.Path], json_log: ty.Optional[pathlib.Path],
num_representatives: int, num_representatives: int,
...@@ -360,13 +390,13 @@ def find_equivalence_classes(*, ...@@ -360,13 +390,13 @@ def find_equivalence_classes(*,
common.set_process_name('pipedream-equivalence-classes') common.set_process_name('pipedream-equivalence-classes')
common.set_scheduler_params() common.set_scheduler_params()
measurements = Benchmark_Run_Summary_Aggregator( measurements = Benchmark_Run_Aggregator(
max_stddev=max_stddev, max_stddev=max_stddev,
# *0.9 to account for dropping the bottom & top 5 percent. # *0.9 to account for dropping the bottom & top 5 percent.
min_samples=MIN_NUM_SAMPLES * 0.9, min_samples=MIN_NUM_SAMPLES * 0.9,
) )
read_measurements_from_files(measurements, [measurements_file]) read_measurements_from_files(measurements, [measurements_input])
############################################################################## ##############################################################################
## build initial input equivalence classes ## build initial input equivalence classes
...@@ -407,6 +437,9 @@ def find_equivalence_classes(*, ...@@ -407,6 +437,9 @@ def find_equivalence_classes(*,
if inst.name in BLACKLISTED_INSTRUCTIONS: if inst.name in BLACKLISTED_INSTRUCTIONS:
return True return True
if inst.name == 'JMP_0':
return False
# forbid instructions with a read/write to a fixed register (forces data dependencies) # forbid instructions with a read/write to a fixed register (forces data dependencies)
for op in inst.operands: for op in inst.operands:
if not isinstance(op, ir.Register_Operand): if not isinstance(op, ir.Register_Operand):
...@@ -424,7 +457,7 @@ def find_equivalence_classes(*, ...@@ -424,7 +457,7 @@ def find_equivalence_classes(*,
EQ.make_measurements([(i,) for i in all_insts], EQ.make_measurements([(i,) for i in all_insts],
measurements, measurements,
output=measurement_output,) output=measurements_output,)
muI_grouped_by_prefix = collections.defaultdict(list) muI_grouped_by_prefix = collections.defaultdict(list)
...@@ -435,7 +468,7 @@ def find_equivalence_classes(*, ...@@ -435,7 +468,7 @@ def find_equivalence_classes(*,
run = measurements[i.name] run = measurements[i.name]
if run.num_muops != 1: if run.num_unfused_muops != 1:
continue continue
muI.append(i) muI.append(i)
...@@ -456,7 +489,7 @@ def find_equivalence_classes(*, ...@@ -456,7 +489,7 @@ def find_equivalence_classes(*,
measurements = measurements, measurements = measurements,
equivalence_classes = prefix_eq_classes, equivalence_classes = prefix_eq_classes,
eq_class_output = None, eq_class_output = None,
measurement_output = None, measurements_output = None,
yaml_log = yaml_log, yaml_log = yaml_log,
json_log = json_log, json_log = json_log,
) )
...@@ -470,7 +503,7 @@ def find_equivalence_classes(*, ...@@ -470,7 +503,7 @@ def find_equivalence_classes(*,
measurements = measurements, measurements = measurements,
equivalence_classes = equivalence_classes, equivalence_classes = equivalence_classes,
eq_class_output = eq_class_output, eq_class_output = eq_class_output,
measurement_output = measurement_output, measurements_output = measurements_output,
yaml_log = yaml_log, yaml_log = yaml_log,
json_log = json_log, json_log = json_log,
) )
...@@ -478,10 +511,10 @@ def find_equivalence_classes(*, ...@@ -478,10 +511,10 @@ def find_equivalence_classes(*,
def _find_equivalence_classes(*, def _find_equivalence_classes(*,
EQ: 'Equivalence_Class_Finder', EQ: 'Equivalence_Class_Finder',
measurements: Benchmark_Run_Summary_Aggregator, measurements: Benchmark_Run_Aggregator,
equivalence_classes: ty.List['Eq_Class'], equivalence_classes: ty.List['Eq_Class'],
eq_class_output: ty.Optional[pathlib.Path], eq_class_output: ty.Optional[pathlib.Path],
measurement_output: ty.Optional[pathlib.Path], measurements_output: ty.Optional[pathlib.Path],
yaml_log: ty.Optional[pathlib.Path], yaml_log: ty.Optional[pathlib.Path],
json_log: ty.Optional[pathlib.Path],) -> ty.List['Eq_Class']: json_log: ty.Optional[pathlib.Path],) -> ty.List['Eq_Class']:
...@@ -519,7 +552,7 @@ def _find_equivalence_classes(*, ...@@ -519,7 +552,7 @@ def _find_equivalence_classes(*,
############################################################################## ##############################################################################
## write output ## write output
if eq_class_output or measurement_output: if eq_class_output or measurements_output:
log('WRITE RESULTS') log('WRITE RESULTS')
if eq_class_output: if eq_class_output:
...@@ -534,6 +567,9 @@ def _find_equivalence_classes(*, ...@@ -534,6 +567,9 @@ def _find_equivalence_classes(*,
print(',', file=fd) print(',', file=fd)
first = False first = False
print(' ', '{', file=fd) print(' ', '{', file=fd)
print(' ', ' ', '"avg-ipc": ' + str(numpy.mean([measurements[i].ipc.mean for i in insts])) + ',', file=fd),
print(' ', ' ', '"avg-fused-mpc": ' + str(numpy.mean([measurements[i].fmpc.mean for i in insts])) + ',', file=fd),
print(' ', ' ', '"avg-unfused-mpc": ' + str(numpy.mean([measurements[i].umpc.mean for i in insts])) + ',', file=fd),
print(' ', ' ', '"insts": [' + ', '.join('"' + i + '"' for i in insts) + ']', file=fd) print(' ', ' ', '"insts": [' + ', '.join('"' + i + '"' for i in insts) + ']', file=fd)
print(' ', '}', end = '', file=fd) print(' ', '}', end = '', file=fd)
print(file=fd) print(file=fd)
...@@ -542,8 +578,8 @@ def _find_equivalence_classes(*, ...@@ -542,8 +578,8 @@ def _find_equivalence_classes(*,
except argparse.ArgumentTypeError as e: except argparse.ArgumentTypeError as e:
print(e, file=sys.stderr) print(e, file=sys.stderr)
if measurement_output is not None: if measurements_output is not None:
write_measurements(measurements, measurement_output) write_measurements(measurements, measurements_output)
return equivalence_classes return equivalence_classes
...@@ -595,6 +631,8 @@ def split_equivalence_classes(EQ, ...@@ -595,6 +631,8 @@ def split_equivalence_classes(EQ,
equivalence_classes: ty.List['Eq_Class']) -> ty.Tuple[ty.List['Eq_Class'], bool]: equivalence_classes: ty.List['Eq_Class']) -> ty.Tuple[ty.List['Eq_Class'], bool]:
assert type(equivalence_classes) is list assert type(equivalence_classes) is list
equivalence_classes = list(equivalence_classes)
changed: bool = False changed: bool = False
log(' SPLIT', len(equivalence_classes), 'CLASS(ES)') log(' SPLIT', len(equivalence_classes), 'CLASS(ES)')
...@@ -602,8 +640,8 @@ def split_equivalence_classes(EQ, ...@@ -602,8 +640,8 @@ def split_equivalence_classes(EQ,
for eq in list(equivalence_classes): for eq in list(equivalence_classes):
# log(' SPLIT', eq) # log(' SPLIT', eq)
# reps = EQ._select_representatives(eq) reps = EQ._select_representatives(eq)
reps = eq.random_sample(EQ.num_representatives) # reps = eq.random_sample(EQ.num_representatives)
EQ.make_measurements(((i,) for i in reps), measurements) EQ.make_measurements(((i,) for i in reps), measurements)
EQ.make_measurements(combination_kernels(reps, repeat=2), measurements, measure_ports=False) EQ.make_measurements(combination_kernels(reps, repeat=2), measurements, measure_ports=False)
...@@ -781,11 +819,14 @@ class Equivalence_Class_Finder: ...@@ -781,11 +819,14 @@ class Equivalence_Class_Finder:
n_random: int n_random: int
if self.random_representatives: if self.random_representatives:
n_alphabetic = 1 n_alphabetic = self.num_representatives // 4
n_random = max(0, self.num_representatives - 1) n_random = self.num_representatives - n_alphabetic
else: else:
n_alphabetic = max(0, self.num_representatives - 1) n_random = self.num_representatives // 4
n_random = 1 n_alphabetic = self.num_representatives - n_random
# n_random = 0
# n_alphabetic = self.num_representatives - n_random
assert n_alphabetic + n_random == self.num_representatives assert n_alphabetic + n_random == self.num_representatives
...@@ -798,14 +839,14 @@ class Equivalence_Class_Finder: ...@@ -798,14 +839,14 @@ class Equivalence_Class_Finder:
return reps return reps
@staticmethod @staticmethod
def eq_class_ipc_and_mpc(eq_clss: Eq_Class, measurements: Benchmark_Run_Summary_Aggregator): def eq_class_ipc_and_mpc(eq_clss: Eq_Class, measurements: Benchmark_Run_Aggregator):
insts = [(i.name,) for i in eq_clss] insts = [(i.name,) for i in eq_clss]
ipc = numpy.mean([measurements[i].ipc.mean for i in insts]) ipc = numpy.mean([measurements[i].ipc.mean for i in insts])
mpc = numpy.mean([measurements[i].mpc.mean for i in insts]) mpc = numpy.mean([measurements[i].umpc.mean for i in insts])
return ipc, mpc return ipc, mpc
@classmethod @classmethod
def log_eq_class(clss, eq_clss: Eq_Class, measurements: Benchmark_Run_Summary_Aggregator, indent: int = 2): def log_eq_class(clss, eq_clss: Eq_Class, measurements: Benchmark_Run_Aggregator, indent: int = 2):
insts = sorted(i.name for i in eq_clss) insts = sorted(i.name for i in eq_clss)
ipc, mpc = clss.eq_class_ipc_and_mpc(eq_clss, measurements) ipc, mpc = clss.eq_class_ipc_and_mpc(eq_clss, measurements)
log(' ' * indent, eq_clss, log(' ' * indent, eq_clss,
...@@ -815,7 +856,7 @@ class Equivalence_Class_Finder: ...@@ -815,7 +856,7 @@ class Equivalence_Class_Finder:
def make_measurements(self, def make_measurements(self,
kernels: ty.Iterable[ty.Tuple[ir.Instruction, ...]], kernels: ty.Iterable[ty.Tuple[ir.Instruction, ...]],
measurements: Benchmark_Run_Summary_Aggregator, measurements: Benchmark_Run_Aggregator,
*, *,
measure_ports: bool = None, measure_ports: bool = None,
force: bool = False, force: bool = False,
...@@ -846,7 +887,7 @@ class Equivalence_Class_Finder: ...@@ -846,7 +887,7 @@ class Equivalence_Class_Finder:
return False return False
extra_counters = [ extra_counters = [
# 'RESOURCE_STALLS', 'RESOURCE_STALLS',
] ]
if measure_ports: if measure_ports:
...@@ -885,29 +926,30 @@ class Equivalence_Class_Finder: ...@@ -885,29 +926,30 @@ class Equivalence_Class_Finder:
tmp_dir = str(pathlib.Path.cwd() / 'tmp'), tmp_dir = str(pathlib.Path.cwd() / 'tmp'),
debug = False,): debug = False,):
try: try:
run.drop_details()
if self.yaml_log is not None: if self.yaml_log is not None:
self.yaml_log.write('---\n') self.yaml_log.write('---\n')
yaml.dump(run, self.yaml_log) yaml.dump(run, self.yaml_log)
self.yaml_log.write('...\n') self.yaml_log.write('...\n')
if self.json_log is not None: if self.json_log is not None:
print(Benchmark_Run_Summary.from_benchmark_run(run).to_json(), file=self.json_log) print(run.to_json(), file=self.json_log)
name = run.benchmark.name name = run.benchmark.name
summary = Benchmark_Run_Summary.from_benchmark_run(run) err = test_equivalence(name, name, run, run)
err = test_equivalence(name, name, summary, summary)
if err: if err:
print(f'error: IPC({name}) != IPC({name})', ':', err, file=sys.stderr) print(f'error: IPC({name}) != IPC({name})', ':', err, file=sys.stderr)
exit(1) exit(1)
added = measurements.add_measurement(summary) added = measurements.add_measurement(run)
if NUM_SAMPLES >= MAX_NUM_SAMPLES or added: if NUM_SAMPLES >= MAX_NUM_SAMPLES or added:
key = tuple(run.benchmark.instructions) key = tuple(run.benchmark.instructions)
unknown.remove(key) unknown.remove(key)
if not added: if not added:
measurements.force_add(summary) measurements.force_add(run)
finally: finally:
if self.yaml_log is not None: if self.yaml_log is not None:
self.yaml_log.flush() self.yaml_log.flush()
...@@ -917,20 +959,17 @@ class Equivalence_Class_Finder: ...@@ -917,20 +959,17 @@ class Equivalence_Class_Finder:
NUM_SAMPLES += NUM_SAMPLES_STEP NUM_SAMPLES += NUM_SAMPLES_STEP
finally: finally:
if output: if output:
os.sched_yield()
write_measurements(measurements, output) write_measurements(measurements, output)
return True return True
def read_measurements_from_files(measurements: Benchmark_Run_Summary_Aggregator, def read_measurements_from_files(measurements: Benchmark_Run_Aggregator,
files: ty.Sequence[pathlib.Path]): files: ty.Sequence[pathlib.Path]):
for file in files: for file in files:
log('READING MEASUREMENTS FROM', shlex.quote(str(file))) log('READING MEASUREMENTS FROM', shlex.quote(str(file)))
with open(file) as fd: measurements.read_from_file(file)
for line in fd:
run = Benchmark_Run_Summary.from_json(line)
measurements.add_measurement(run)
log('FOUND', len(measurements), 'measurement(s)') log('FOUND', len(measurements), 'measurement(s)')
...@@ -938,7 +977,7 @@ def read_measurements_from_files(measurements: Benchmark_Run_Summary_Aggregator, ...@@ -938,7 +977,7 @@ def read_measurements_from_files(measurements: Benchmark_Run_Summary_Aggregator,
def read_equivalence_classes_from_file(instruction_set: ir.Instruction_Set, file: str): def read_equivalence_classes_from_file(instruction_set: ir.Instruction_Set, file: str):
try: try:
with argparse.FileType('r')(file) as fd: with argparse.FileType('r')(file) as fd:
json_eq_classes = json.load(fd) json_eq_classes = json.load(fd, allow_comments=True)
equivalence_classes = [] equivalence_classes = []
all_insts = [] all_insts = []
...@@ -964,7 +1003,7 @@ def read_equivalence_classes_from_file(instruction_set: ir.Instruction_Set, file ...@@ -964,7 +1003,7 @@ def read_equivalence_classes_from_file(instruction_set: ir.Instruction_Set, file
exit(1) exit(1)
def write_measurements(measurements: Benchmark_Run_Summary_Aggregator, file: pathlib.Path): def write_measurements(measurements: Benchmark_Run_Aggregator, file: pathlib.Path):
assert isinstance(file, pathlib.Path) assert isinstance(file, pathlib.Path)
log('WRITE MEASUREMENTS TO', shlex.quote(str(file))) log('WRITE MEASUREMENTS TO', shlex.quote(str(file)))
...@@ -972,21 +1011,17 @@ def write_measurements(measurements: Benchmark_Run_Summary_Aggregator, file: pat ...@@ -972,21 +1011,17 @@ def write_measurements(measurements: Benchmark_Run_Summary_Aggregator, file: pat
written = 0 written = 0
if str(file) in ['/dev/stdout', '/dev/stderr']: if str(file) in ['/dev/stdout', '/dev/stderr']:
with open(file, 'w') as fd: written += measurements.write_to_file(file, only_best=False)
for m in measurements.all_measurements():
print(m.to_json(), file=fd)
written += 1
else: else:
with tempfile.NamedTemporaryFile(mode='w', with tempfile.NamedTemporaryFile(mode='w',
prefix='eq-class-measurements.', prefix='eq-class-measurements.',
suffix='.jsonl', suffix=file.suffix,
delete=False,) as fd: delete=False,) as fd:
for m in measurements.all_measurements(): written += measurements.write_to_file(pathlib.Path(fd.name), only_best=False)
print(m.to_json(), file=fd) os.makedirs(file.parent, exist_ok=True)
written += 1 if file.exists():
os.makedirs(file.parent, exist_ok=True)
shutil.move(file, file.with_suffix(file.suffix + '.bkp')) shutil.move(file, file.with_suffix(file.suffix + '.bkp'))
shutil.move(fd.name, file) shutil.move(fd.name, file)
log('WROTE', written, 'measurement(s)') log('WROTE', written, 'measurement(s)')
...@@ -995,12 +1030,19 @@ def combination_kernels(*iterables: ty.Sequence[ir.Instruction], repeat: int = 1 ...@@ -995,12 +1030,19 @@ def combination_kernels(*iterables: ty.Sequence[ir.Instruction], repeat: int = 1
## AB is not necessarily the same as AABB (x86 is weird) ## AB is not necessarily the same as AABB (x86 is weird)
## so we use product instead of combinations ## so we use product instead of combinations
for combi in itertools.product(*iterables, repeat=repeat): for combi in itertools.product(*iterables, repeat=repeat):
for N in range(1, 6): for kernel in repetition_kernels(combi):
kernel = sum(((i,) * N for i in combi), ())
yield kernel yield kernel
def repetition_kernels(kernel: ty.Tuple[ir.Instruction, ...],
max_repeat: int = 5) -> ty.Iterable[ty.Tuple[ir.Instruction, ...]]:
assert max_repeat >= 1
for N in range(1, max_repeat + 1):
k = sum(((i,) * N for i in kernel), ())
yield k
class Percent: class Percent:
def __init__(self, numerator, denominator = None): def __init__(self, numerator, denominator = None):
if type(numerator) is Percent: if type(numerator) is Percent:
...@@ -1046,8 +1088,8 @@ Percent.FIVE = Percent(5, 100) ...@@ -1046,8 +1088,8 @@ Percent.FIVE = Percent(5, 100)
def test_equivalence(A: ir.Instruction, B: ir.Instruction, def test_equivalence(A: ir.Instruction, B: ir.Instruction,
runA: Benchmark_Run_Summary, runB: Benchmark_Run_Summary, runA: Benchmark_Run, runB: Benchmark_Run,
runAB: Benchmark_Run_Summary = None, *, margin: Percent = Percent.FIVE) -> ty.Optional[str]: runAB: Benchmark_Run = None, *, margin: Percent = Percent.FIVE) -> ty.Optional[str]:
""" """
Check if instruction A and B are equivalent. Check if instruction A and B are equivalent.
Returns an error message iff they are not equal, None otherwise. Returns an error message iff they are not equal, None otherwise.
...@@ -1069,17 +1111,25 @@ def test_equivalence(A: ir.Instruction, B: ir.Instruction, ...@@ -1069,17 +1111,25 @@ def test_equivalence(A: ir.Instruction, B: ir.Instruction,
dev = round(stat.stddev, 3) dev = round(stat.stddev, 3)
return f'{mean}±{dev}' return f'{mean}±{dev}'
assert runA.num_fused_muops == runB.num_fused_muops, f'num_fused_muops({A})={runA.num_fused_muops} ' \
f'num_fused_muops({B})={runB.num_fused_muops}'
assert runA.num_unfused_muops == runB.num_unfused_muops, f'num_unfused_muops({A})={runA.num_unfused_muops} ' \
f'num_unfused_muops({B})={runB.num_unfused_muops}'
assert IPCs_are_equivalent(runA, runB, margin), f'IPC({A})={pstat(runA.ipc)} IPC({B})={pstat(runB.ipc)}' assert IPCs_are_equivalent(runA, runB, margin), f'IPC({A})={pstat(runA.ipc)} IPC({B})={pstat(runB.ipc)}'
if runAB: if runAB:
assert IPCs_are_equivalent(runA, runAB, margin), f'IPC({A})={pstat(runA.ipc)} IPC({A} {B})={pstat(runAB.ipc)}' assert IPCs_are_equivalent(runA, runAB, margin), f'IPC({A})={pstat(runA.ipc)} IPC({A} {B})={pstat(runAB.ipc)}'
assert IPCs_are_equivalent(runB, runAB, margin), f'IPC({B})={pstat(runB.ipc)} IPC({A} {B})={pstat(runAB.ipc)}' assert IPCs_are_equivalent(runB, runAB, margin), f'IPC({B})={pstat(runB.ipc)} IPC({A} {B})={pstat(runAB.ipc)}'
assert MPCs_are_equivalent(runA, runB, margin), f'MPC({A})={pstat(runA.mpc)} MPC({B})={pstat(runB.mpc)}' assert MPCs_are_equivalent(runA, runB, margin), f'MPC({A})={pstat(runA.umpc)} MPC({B})={pstat(runB.umpc)}'
if runAB: if runAB:
assert MPCs_are_equivalent(runA, runAB, margin), f'MPC({A})={pstat(runA.mpc)} MPC({A} {B})={pstat(runAB.mpc)}' assert MPCs_are_equivalent(runA, runAB, margin), f'MPC({A})={pstat(runA.umpc)} MPC({A} {B})={pstat(runAB.umpc)}'
assert MPCs_are_equivalent(runB, runAB, margin), f'MPC({B})={pstat(runB.mpc)} MPC({A} {B})={pstat(runAB.mpc)}' assert MPCs_are_equivalent(runB, runAB, margin), f'MPC({B})={pstat(runB.umpc)} MPC({A} {B})={pstat(runAB.umpc)}'
if runA.port_muops and runB.port_muops: USE_PORTS = False
if USE_PORTS and runA.port_muops and runB.port_muops:
def ports_used(run) -> ty.Set[int]: def ports_used(run) -> ty.Set[int]:
"return set of ports used by a benchmark run (not by how much each port is used)" "return set of ports used by a benchmark run (not by how much each port is used)"
...@@ -1089,7 +1139,7 @@ def test_equivalence(A: ir.Instruction, B: ir.Instruction, ...@@ -1089,7 +1139,7 @@ def test_equivalence(A: ir.Instruction, B: ir.Instruction,
## a port that receives less than 5% of all muops is considered unused ## a port that receives less than 5% of all muops is considered unused
## (i.e. that usage is some measurement noise, comes from another hyperthread, ...) ## (i.e. that usage is some measurement noise, comes from another hyperthread, ...)
## TODO: investigate example ADC_GPR64i64_IMMi32 ## TODO: investigate example ADC_GPR64i64_IMMi32
if (stat.mean / run.muops.mean) >= 0.05: if (stat.mean / run.unfused_muops.mean) >= 0.05:
ports.add(port) ports.add(port)
return ports return ports
...@@ -1125,6 +1175,12 @@ def round_float(value: float) -> fractions.Fraction: ...@@ -1125,6 +1175,12 @@ def round_float(value: float) -> fractions.Fraction:
return rounded return rounded
def powerset(iterable: ty.Iterable['T']) -> ty.Iterable[ty.Tuple['T', ...]]:
"powerset([1,2,3]) --> () (1,) (2,) (3,) (1,2) (1,3) (2,3) (1,2,3)"
s = list(iterable)
return itertools.chain.from_iterable(itertools.combinations(s, r) for r in range(len(s) + 1))
def ttest_1samp_from_stats(mean, std, var, nobs, popmean): def ttest_1samp_from_stats(mean, std, var, nobs, popmean):
""" """
Calculate the T-test for the mean of ONE group of scores from descriptive statistics. Calculate the T-test for the mean of ONE group of scores from descriptive statistics.
...@@ -1191,29 +1247,30 @@ def tost_paired(y, x, low, upp, transform=None): ...@@ -1191,29 +1247,30 @@ def tost_paired(y, x, low, upp, transform=None):
P_VALUE = 0.05 P_VALUE = 0.05
def IPCs_are_equivalent(a: Benchmark_Run_Summary, b: Benchmark_Run_Summary, margin: Percent = Percent.FIVE): def IPCs_are_equivalent(a: Benchmark_Run, b: Benchmark_Run, margin: Percent = Percent.FIVE):
assert 0 <= margin <= 1 assert 0 <= margin <= 1
if round(a.ipc.p75, 2) == round(b.ipc.p75, 2): # if round(a.ipc.p75, 2) == round(b.ipc.p75, 2):
return True # return True
margin = min(a.ipc.mean, b.ipc.mean) * float(margin) margin = min(a.ipc.mean, b.ipc.mean) * float(margin)
return means_are_equivalent(a.ipc, b.ipc, margin) return means_are_equivalent(a.ipc, b.ipc, margin)
def MPCs_are_equivalent(a: Benchmark_Run_Summary, b: Benchmark_Run_Summary, margin: Percent = Percent.FIVE): def MPCs_are_equivalent(a: Benchmark_Run, b: Benchmark_Run, margin: Percent = Percent.FIVE):
assert 0 <= margin <= 1 assert 0 <= margin <= 1
if round(a.mpc.p75, 2) == round(b.mpc.p75, 2): # if round(a.umpc.p75, 2) == round(b.umpc.p75, 2) and round(a.fmpc.p75, 2) == round(b.fmpc.p75, 2):
return True # return True
margin = min(a.mpc.mean, b.mpc.mean) * float(margin) u_margin = min(a.umpc.mean, b.umpc.mean) * float(margin)
f_margin = min(a.fmpc.mean, b.fmpc.mean) * float(margin)
return means_are_equivalent(a.mpc, b.mpc, margin) return means_are_equivalent(a.umpc, b.umpc, u_margin) and means_are_equivalent(a.fmpc, b.fmpc, f_margin)
def port_muops_are_equivalent(a: Benchmark_Run_Summary, b: Benchmark_Run_Summary, port: int): def port_muops_are_equivalent(a: Benchmark_Run, b: Benchmark_Run, port: int):
## Muop per port counters aren't that precise and port usage fluctuates way more than ## Muop per port counters aren't that precise and port usage fluctuates way more than
## cycles or total number of muops, so we are only looking for a very broad sense of equality here. ## cycles or total number of muops, so we are only looking for a very broad sense of equality here.
## The average benchmark runs 5_000_000 muops, so this works out to a margin of 200_000 muops. ## The average benchmark runs 5_000_000 muops, so this works out to a margin of 200_000 muops.
......
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment