Commit f249c4e9 authored by Philippe SWARTVAGHER's avatar Philippe SWARTVAGHER
Browse files

model: move combining models in a dedicated class

parent dfa67aff
import copy
import statistics
from .comm_comp import FilesParser, CompMetric
from .hwloc import HwlocTopology
class CommCompModel:
......@@ -53,6 +55,9 @@ class CommCompModel:
if comp_bw_alone[i] > self.max_comp_alone_value:
self.max_comp_alone_value = comp_bw_alone[i]
self.max_comp_alone_i = i
# Consider max_together happens with less or same number of lonely computing cores:
# (If it's not the case, it means we are on the plateau of the total
# curve and it's only a minor variation. See occigen-bw/comp_1_comm1.)
for i in range(self.max_comp_alone_i+1):
if total_parallel[i] > self.max_together_value:
self.max_together_value = total_parallel[i]
......@@ -123,8 +128,6 @@ class CommCompModel:
last_comm_ratio_left_value = None
last_comm_ratio_left_i = None
diff = max(0, self.max_together_value - self.max_comp_alone_value)
for i in range(len(self.x_model)):
# The comp alone perf can't be higher than the perfect scaling, the total measured and the max observed comp alone perf:
self.comp_alone_model.append(min(self.comp_mem_req*self.x_model[i], self.total_model[i], self.max_comp_alone_value))
......@@ -140,6 +143,8 @@ class CommCompModel:
assert(last_comm_ratio_left_value <= 1)
last_comm_ratio_left_i = i
# Was true for experiences on several machines.
# Let's keep it and see what is happening when this assertion will become false.
assert(self.x_model[i] <= self.xs[self.max_together_i])
else:
# The communications can be affected, so apply the ratio:
......@@ -160,3 +165,47 @@ class CommCompModel:
# The remaining of the bandwidth is for computations:
self.comp_with_comm_model.append(self.total_model[i]-self.comm_with_comp_model[i])
class CombinedCommCompModel:
def __init__(self, topology: HwlocTopology, model_local: CommCompModel, model_remote: CommCompModel=None):
self.topology = topology
self.model_local = model_local
self.model_remote = model_remote
def comm_with_comp(self, numa_comm, numa_comp):
if numa_comp == numa_comm and numa_comp >= self.topology.nb_numa_nodes_per_socket:
# Remote accesses, same memory controller:
return self.model_remote.comm_with_comp_model
else:
comm_model = copy.deepcopy(self.model_local)
if numa_comm >= self.topology.nb_numa_nodes_per_socket:
# On some machines (eg AMD with IB 200GB/s) the network
# performance is very sensible to placement, so use the
# nominal remote perf for all configuration where the comm
# memory is bound remotely.
comm_model.comm_alone_bw = self.model_remote.comm_alone_bw
comm_model.predict()
return comm_model.comm_with_comp_model
def comp_with_comm(self, numa_comm, numa_comp):
if numa_comp < self.topology.nb_numa_nodes_per_socket:
# Computations do local accesses:
if numa_comm == numa_comp:
return self.model_local.comp_with_comm_model
else:
return self.model_local.comp_alone_model
else:
# Computations do remote acceses:
if numa_comm == numa_comp:
return self.model_remote.comp_with_comm_model
else:
return self.model_remote.comp_alone_model
def total(self, numa_comp):
if numa_comp < self.topology.nb_numa_nodes_per_socket:
# Computations do local accesses:
return self.model_local.total_model
else:
# Computations do remote acceses:
return self.model_remote.total_model
#!/usr/bin/env python3
import argparse
import copy
import glob
import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator
......@@ -60,12 +59,16 @@ if cli_args.model:
model_local = get_model(cli_args.comp_kernel, 0, nb_cores_to_consider)
model_local.print_params()
model_remote = None
if topo.nb_numa_nodes_total > 1:
print("** Modeling remote accesses...")
try:
model_remote = get_model(cli_args.comp_kernel, topo.nb_numa_nodes_per_socket, nb_cores_to_consider)
model_remote.print_params()
except Exception as e:
print(f"Can't model remote accesses: {e}")
model_remote = get_model(cli_args.comp_kernel, topo.nb_numa_nodes_per_socket, nb_cores_to_consider)
model_remote.print_params()
combined_model = CombinedCommCompModel(topo, model_local, model_remote)
else:
print("** Skipping model building")
......@@ -146,21 +149,8 @@ for numa_comp in range(topo.nb_numa_nodes_total):
comm_with_comp_upper_values
)
# Decision of which model to apply for communications:
if cli_args.model:
if numa_comp == numa_comm and numa_comp >= topo.nb_numa_nodes_per_socket:
# Remote, diagonal
comm_values_model = model_remote.comm_with_comp_model
else:
comm_model = copy.deepcopy(model_local)
if numa_comm >= topo.nb_numa_nodes_per_socket:
# On some machines (eg AMD with IB 200GB/s) the network
# performance is very sensible to placement, so use the
# nominal remote perf for all configuration where the comm
# memory is bound remotely.
comm_model.comm_alone_bw = model_remote.comm_alone_bw
comm_model.predict()
comm_values_model = comm_model.comm_with_comp_model
comm_values_model = combined_model.comm_with_comp(numa_comm, numa_comp)
graph.add_comm_curve(
comm_values_model,
......@@ -201,21 +191,8 @@ for numa_comp in range(topo.nb_numa_nodes_total):
)
if cli_args.model:
# Decision of which model to apply for computations:
if numa_comp < topo.nb_numa_nodes_per_socket:
# Computations do local accesses:
if numa_comm == numa_comp:
comp_values_model = model_local.comp_with_comm_model
else:
comp_values_model = model_local.comp_alone_model
total_model = model_local.total_model
else:
# Computations do remote acceses:
if numa_comm == numa_comp:
comp_values_model = model_remote.comp_with_comm_model
else:
comp_values_model = model_remote.comp_alone_model
total_model = model_remote.total_model
comp_values_model = combined_model.comp_with_comm(numa_comm, numa_comp)
total_model = combined_model.total(numa_comp)
graph.add_comp_curve(
comp_values_model,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment