Commit 5cc9008e authored by Millian Poquet's avatar Millian Poquet
Browse files

test: usage trace profile

parent 1ed61a94
......@@ -124,6 +124,9 @@ let
"^workloads/smpi"
"^workloads/smpi/.*"
"^workloads/smpi/.*/.*\.txt"
"^workloads/usage-trace"
"^workloads/usage-trace/.*"
"^workloads/usage-trace/.*/.*\.txt"
"^events"
"^events/.*\.txt"
];
......
<?xml version='1.0'?>
<!DOCTYPE platform SYSTEM "http://simgrid.gforge.inria.fr/simgrid/simgrid.dtd">
<platform version="4.1">
<zone id="AS0" routing="Full">
<!-- ljlkj -->
<host id="master_host" speed="100.0Mf"/>
<host id="a" core="100" pstate="0" speed="100.0f">
<prop id="wattage_per_state" value="50.0:100:200.0"/>
<prop id="wattage_off" value="5.0"/>
</host>
<host id="b" core="100" pstate="0" speed="100.0f">
<prop id="wattage_per_state" value="50.0:100:200.0"/>
<prop id="wattage_off" value="5.0"/>
</host>
<host id="c" core="100" pstate="0" speed="50.0f">
<prop id="wattage_per_state" value="50.0:90.0:150.0"/>
<prop id="wattage_off" value="5.0"/>
</host>
<host id="d" core="100" pstate="0" speed="50.0f">
<prop id="wattage_per_state" value="50.0:90.0:150.0"/>
<prop id="wattage_off" value="5.0"/>
</host>
<link id="6" bandwidth="41.279125MBps" latency="59.904us"/>
<link id="3" bandwidth="34.285625MBps" latency="514.433us"/>
<link id="7" bandwidth="11.618875MBps" latency="189.98us"/>
<link id="9" bandwidth="7.20975MBps" latency="1.461517ms"/>
<link id="2" bandwidth="118.6825MBps" latency="136.931us"/>
<link id="8" bandwidth="8.158MBps" latency="270.544us"/>
<link id="1" bandwidth="34.285625MBps" latency="514.433us"/>
<link id="4" bandwidth="10.099625MBps" latency="479.78us"/>
<link id="0" bandwidth="41.279125MBps" latency="59.904us"/>
<link id="5" bandwidth="27.94625MBps" latency="278.066us"/>
<link id="loopback" bandwidth="498MBps" latency="15us" sharing_policy="FATPIPE"/>
<route src="master_host" dst="master_host"><link_ctn id="loopback"/></route>
<route src="a" dst="a"><link_ctn id="loopback"/></route>
<route src="b" dst="b"><link_ctn id="loopback"/></route>
<route src="c" dst="c"><link_ctn id="loopback"/></route>
<route src="d" dst="d"><link_ctn id="loopback"/></route>
<route src="master_host" dst="a">
<link_ctn id="9"/>
</route>
<route src="master_host" dst="b">
<link_ctn id="4"/><link_ctn id="3"/><link_ctn id="2"/><link_ctn id="0"/><link_ctn id="1"/><link_ctn id="8"/>
</route>
<route src="master_host" dst="c">
<link_ctn id="4"/><link_ctn id="3"/><link_ctn id="5"/>
</route>
<route src="master_host" dst="d">
<link_ctn id="4"/><link_ctn id="3"/><link_ctn id="2"/><link_ctn id="0"/><link_ctn id="1"/><link_ctn id="6"/><link_ctn id="7"/>
</route>
<route src="a" dst="b">
<link_ctn id="9"/><link_ctn id="4"/><link_ctn id="3"/><link_ctn id="2"/><link_ctn id="0"/><link_ctn id="1"/><link_ctn id="8"/>
</route>
<route src="a" dst="c">
<link_ctn id="9"/><link_ctn id="4"/><link_ctn id="3"/><link_ctn id="5"/>
</route>
<route src="a" dst="d">
<link_ctn id="9"/><link_ctn id="4"/><link_ctn id="3"/><link_ctn id="2"/><link_ctn id="0"/><link_ctn id="1"/><link_ctn id="6"/><link_ctn id="7"/>
</route>
<route src="b" dst="c">
<link_ctn id="8"/><link_ctn id="1"/><link_ctn id="0"/><link_ctn id="2"/><link_ctn id="5"/>
</route>
<route src="b" dst="d">
<link_ctn id="8"/><link_ctn id="6"/><link_ctn id="7"/>
</route>
<route src="c" dst="d">
<link_ctn id="5"/><link_ctn id="2"/><link_ctn id="0"/><link_ctn id="1"/><link_ctn id="6"/><link_ctn id="7"/>
</route>
</zone>
</platform>
......@@ -57,6 +57,7 @@ def pytest_generate_tests(metafunc):
# Platforms
platforms_def = {
"small": "small_platform.xml",
"smallusage": "small_platform_usage_replay.xml",
"cluster512": "cluster512.xml",
"cluster512_pfs": "cluster512_pfs.xml",
"energy128notopo": "energy_platform_homogeneous_no_net_128.xml",
......@@ -87,6 +88,7 @@ def pytest_generate_tests(metafunc):
"smpicollectives": "test_smpi_collectives.json",
"tuto1": "test_case_study1.json",
"tutostencil": "test_tuto_stencil.json",
"usagetrace": "test_usage_trace.json",
"walltime": "test_walltime.json",
"walltimesmpi": "test_walltime_smpi.json",
}
......@@ -97,6 +99,7 @@ def pytest_generate_tests(metafunc):
moldable_perf_degradation_workloads = ['compute1', 'computetot1']
energymini_workloads = ['energymini0', 'energymini50', 'energymini100']
tuto_stencil_workloads = ['tutostencil']
usage_trace_workloads = ['usagetrace']
workflows = ['genome']
# Algorithms
......@@ -143,6 +146,8 @@ def pytest_generate_tests(metafunc):
metafunc.parametrize('cluster_pfs_platform', generate_platforms(platform_dir, platforms_def, ['cluster512_pfs']))
if 'properties_platform' in metafunc.fixturenames:
metafunc.parametrize('properties_platform', generate_platforms(platform_dir, platforms_def, ['properties_platform']))
if 'usage_trace_platform' in metafunc.fixturenames:
metafunc.parametrize('usage_trace_platform', generate_platforms(platform_dir, platforms_def, ['smallusage']))
# Workloads
if 'workload' in metafunc.fixturenames:
......@@ -163,6 +168,8 @@ def pytest_generate_tests(metafunc):
metafunc.parametrize('moldable_perf_degradation_workload', generate_workloads(workload_dir, workloads_def, moldable_perf_degradation_workloads))
if 'energymini_workload' in metafunc.fixturenames:
metafunc.parametrize('energymini_workload', generate_workloads(workload_dir, workloads_def, energymini_workloads))
if 'usage_trace_workload' in metafunc.fixturenames:
metafunc.parametrize('usage_trace_workload', generate_workloads(workload_dir, workloads_def, usage_trace_workloads))
if 'samesubmittime_workload' in metafunc.fixturenames:
metafunc.parametrize('samesubmittime_workload', generate_workloads(workload_dir, workloads_def, ['samesubmittime']))
if 'walltime_workload' in metafunc.fixturenames:
......
#!/usr/bin/env python3
'''Usage trace tests.
These tests check that the energy consumption of usage trace profiles are the expected ones.
'''
import pandas as pd
import pytest
from helper import *
fast_speed = 100
fast_widle = 50
fast_wmin = 100
fast_wmax = 200
slow_speed = 50
slow_widle = 50
slow_wmin = 90
slow_wmax = 150
def joule_prediction(time, wattmin, wattmax, usage):
return time*(wattmin + (wattmax-wattmin)*usage)
def check_ok_bool(row):
if int(row['execution_time']) != int(row['expected_execution_time']):
return False
if int(row['consumed_energy']) != int(row['expected_consumed_energy']):
return False
return True
def check_ok(row):
return int(check_ok_bool(row))
def estimate_job_from_real_trace():
traces = [
(0, 0.86, 1186),
(0, 0.64, 469),
(0, 0.79, 456),
(0, 0.84, 4643),
(0, 0.85, 4659),
(0, 0.9, 1000),
(0, 0.83, 3614),
(0, 0.84, 4643),
(0, 0.9, 933),
(0, 0.83, 3759),
(0, 0.89, 1011),
(0, 0.83, 3614),
(0, 0.84, 4571),
(0, 0.91, 923),
(0, 0.84, 3643),
(0, 0.89, 1079),
(0, 0.83, 3614),
(0, 0.85, 4588),
(0, 0.85, 4588),
(0, 0.91, 989),
(0, 0.84, 3643),
(0, 0.85, 4659),
(0, 0.84, 4643),
(0, 0.88, 3068),
(0, 0.79, 1519),
(0, 0.89, 674),
(1, 0.98, 1041),
(1, 0.73, 411),
(1, 0.88, 409),
(1, 0.98, 2755),
(1, 0.88, 1364),
(1, 1.0, 1020),
(1, 0.94, 3383),
(1, 0.99, 2424),
(1, 0.88, 1432),
(1, 0.98, 2694),
(1, 0.89, 1416),
(1, 1, 840),
(1, 0.97, 2041),
(1, 0.89, 1281),
(1, 0.99, 2667),
(1, 0.89, 1416),
(1, 0.99, 2667),
(1, 0.89, 1348),
(1, 1, 840),
(1, 0.98, 1898),
(1, 0.89, 1348),
(1, 0.98, 2816),
(1, 0.88, 1364),
(1, 0.97, 18990),
(1, 0.89, 1348),
(1, 0.99, 2727),
(1, 0.89, 1348),
(1, 1.0, 600),
(2, 0.84, 1429),
(2, 0.64, 469),
(2, 0.86, 3837),
(2, 0.75, 960),
(2, 0.9, 667),
(2, 0.86, 3070),
(2, 0.76, 947),
(2, 0.86, 3698),
(2, 0.76, 947),
(2, 0.86, 3767),
(2, 0.76, 868),
(2, 0.86, 3837),
(2, 0.75, 880),
(2, 0.86, 3698),
(2, 0.75, 960),
(2, 0.86, 3628),
(2, 0.75, 960),
(2, 0.86, 3698),
(2, 0.75, 960),
(2, 0.86, 3767),
(2, 0.75, 960),
(2, 0.85, 3741),
(2, 0.75, 960),
(2, 0.86, 3698),
(2, 0.76, 947),
(2, 0.87, 3724),
(2, 0.75, 960),
(2, 0.87, 3655),
(2, 0.75, 960),
(2, 0.91, 593),
(2, 0.85, 3176),
(2, 0.75, 960),
(2, 0.85, 3741),
(2, 0.76, 947),
(2, 0.88, 682),
(3, 0.87, 1379),
(3, 0.66, 455),
(3, 0.89, 3708),
(3, 0.77, 935),
(3, 0.9, 3600),
(3, 0.78, 923),
(3, 0.89, 3573),
(3, 0.78, 923),
(3, 0.88, 3750),
(3, 0.77, 779),
(3, 0.89, 3708),
(3, 0.78, 846),
(3, 0.89, 3573),
(3, 0.77, 935),
(3, 0.89, 3506),
(3, 0.77, 935),
(3, 0.9, 3533),
(3, 0.77, 935),
(3, 0.89, 3640),
(3, 0.77, 935),
(3, 0.89, 3573),
(3, 0.77, 935),
(3, 0.91, 3495),
(3, 0.78, 923),
(3, 0.9, 3600),
(3, 0.78, 923),
(3, 0.9, 3533),
(3, 0.78, 923),
(3, 0.9, 3600),
(3, 0.78, 923),
(3, 0.89, 3573),
(3, 0.77, 935),
(3, 0.91, 659),
]
traces_df = pd.DataFrame(traces, columns = ['machine_id', 'usage', 'flops'])
# job allocation (rank->machine_type)
machines = [
(0, 'fast'),
(1, 'fast'),
(2, 'slow'),
(3, 'slow'),
]
machines_df = pd.DataFrame(machines, columns = ['machine_id', 'machine_type'])
# parameters of each type of machine
machine_types = [
('fast', fast_speed, fast_widle, fast_wmin, fast_wmax),
('slow', slow_speed, slow_widle, slow_wmin, slow_wmax),
]
machine_types_df = pd.DataFrame(machine_types, columns = ['machine_type', 'speed', 'widle', 'wmin', 'wmax'])
df = pd.merge(traces_df, pd.merge(machines_df, machine_types_df))
df['duration'] = df['flops'] / df['speed']
df['w'] = df['wmin'] + (df['wmax'] - df['wmin']) * df['usage']
df['joules'] = df['w'] * df['duration']
idles = df.groupby(['machine_id'])['duration'].agg('sum').to_frame()
idles.reset_index(level=0, inplace=True)
idles['job_duration'] = idles['duration'].max()
idles['idle_time'] = idles['job_duration'] - idles['duration']
idles = pd.merge(idles, pd.merge(machines_df, machine_types_df))
idles['joules'] = idles['widle'] * idles['idle_time']
job_execution_time = idles['duration'].max()
job_joules = idles['joules'].sum() + df['joules'].sum()
return ['60', job_execution_time, job_joules]
def usage_trace(platform, workload, algorithm):
test_name = f'usagetrace-{algorithm.name}-{platform.name}-{workload.name}'
output_dir, robin_filename, _ = init_instance(test_name)
if algorithm.sched_implem != 'batsched': raise Exception('This test only supports batsched for now')
batcmd = gen_batsim_cmd(platform.filename, workload.filename, output_dir, "--energy")
instance = RobinInstance(output_dir=output_dir,
batcmd=batcmd,
schedcmd=f"batsched -v '{algorithm.sched_algo_name}'",
simulation_timeout=30, ready_timeout=5,
success_timeout=10, failure_timeout=0
)
instance.to_file(robin_filename)
ret = run_robin(robin_filename)
if ret.returncode != 0: raise Exception(f'Bad robin return code ({ret.returncode})')
# Analyze Batsim results to check their energy consumption is the expected one.
batjobs_filename = f'{output_dir}/batres_jobs.csv'
jobs = pd.read_csv(batjobs_filename)
jobs['job_id'] = jobs['job_id'].astype('string')
jobs.sort_values(by=['job_id'], inplace=True)
expected = [
['0', 10, 2*joule_prediction(10, fast_widle, fast_widle, 0.0)],
['10', 10, 2*joule_prediction(10, fast_wmin, fast_wmax, 1.0)],
['11', 20, 2*joule_prediction(20, slow_wmin, slow_wmax, 1.0)],
['20', 10, 2*joule_prediction(10, fast_wmin, fast_wmax, 0.5)],
['21', 20, 2*joule_prediction(20, slow_wmin, slow_wmax, 0.5)],
['30', 20, 2*(joule_prediction(10, fast_wmin, fast_wmax, 1.0)+joule_prediction(10, fast_wmin, fast_wmax, 0.1))],
['31', 40, 2*(joule_prediction(20, slow_wmin, slow_wmax, 1.0)+joule_prediction(20, slow_wmin, slow_wmax, 0.1))],
['40', 10, joule_prediction(10, fast_wmin, fast_wmax, 0.2)+joule_prediction(10, fast_wmin, fast_wmax, 0.6)],
['41', 20, joule_prediction(20, slow_wmin, slow_wmax, 0.2)+joule_prediction(20, slow_wmin, slow_wmax, 0.6)],
['50', 20, joule_prediction(10, fast_wmin, fast_wmax, 0.1)+joule_prediction(10, fast_widle, fast_widle, 0.0) +
joule_prediction(10, fast_wmin, fast_wmax, 0.01)+joule_prediction(10, fast_wmin, fast_wmax, 0.97)],
['51', 40, joule_prediction(20, slow_wmin, slow_wmax, 0.1)+joule_prediction(20, slow_widle, slow_widle, 0.0) +
joule_prediction(20, slow_wmin, slow_wmax, 0.01)+joule_prediction(20, slow_wmin, slow_wmax, 0.97)],
estimate_job_from_real_trace()
]
expected_df = pd.DataFrame(expected, columns = ['job_id', 'expected_execution_time', 'expected_consumed_energy'])
merged = pd.merge(jobs, expected_df)
if len(merged) != len(jobs):
raise Exception('There are {} jobs in the workload but only {} jobs are known by the test'.format(len(jobs), len(merged)))
merged['valid'] = merged.apply(check_ok, axis=1)
if merged['valid'].sum() != len(merged):
print('Some jobs are invalid!')
print(merged[['job_id', 'valid', 'execution_time', 'expected_execution_time', 'consumed_energy', 'expected_consumed_energy']])
raise Exception('The execution of some jobs did not match this test expectations.')
else:
print('All jobs are valid!')
print(merged[['job_id', 'valid', 'execution_time', 'expected_execution_time', 'consumed_energy', 'expected_consumed_energy']])
def test_usage_trace(usage_trace_platform, usage_trace_workload, fcfs_algorithm):
usage_trace(usage_trace_platform, usage_trace_workload, fcfs_algorithm)
{
"nb_res": 4,
"jobs": [
{"id": 0, "subtime": 0, "walltime": 100, "res": 2, "profile": "delay-10s"},
{"id": 10, "subtime": 100, "walltime": 100, "res": 2, "profile": "ut-homo-onephase-100"},
{"id": 11, "subtime": 101, "walltime": 100, "res": 2, "profile": "ut-homo-onephase-100"},
{"id": 20, "subtime": 200, "walltime": 100, "res": 2, "profile": "ut-homo-onephase-50"},
{"id": 21, "subtime": 201, "walltime": 100, "res": 2, "profile": "ut-homo-onephase-50"},
{"id": 30, "subtime": 300, "walltime": 100, "res": 2, "profile": "ut-homo-twophases-100-10"},
{"id": 31, "subtime": 301, "walltime": 100, "res": 2, "profile": "ut-homo-twophases-100-10"},
{"id": 40, "subtime": 400, "walltime": 100, "res": 2, "profile": "ut-hetero-onephase-20-60"},
{"id": 41, "subtime": 401, "walltime": 100, "res": 2, "profile": "ut-hetero-onephase-20-60"},
{"id": 50, "subtime": 500, "walltime": 100, "res": 2, "profile": "ut-hetero-onephase-10-twophases-1-97"},
{"id": 51, "subtime": 501, "walltime": 100, "res": 2, "profile": "ut-hetero-onephase-10-twophases-1-97"},
{"id": 60, "subtime": 1000, "walltime": 2000, "res": 4, "profile": "ut-from-real-trace"}
],
"profiles": {
"delay-10s": {
"type": "delay",
"delay": 10
},
"ut-hetero-onephase-20-60": {
"type": "usage_trace",
"trace": "usage-trace/hetero-onephase-20-60/traces.txt"
},
"ut-hetero-onephase-10-twophases-1-97": {
"type": "usage_trace",
"trace": "usage-trace/hetero-onephase-10-twophases-1-97/traces.txt"
},
"ut-homo-onephase-100": {
"type": "usage_trace",
"trace": "usage-trace/homo-onephase-100/traces.txt"
},
"ut-homo-onephase-50": {
"type": "usage_trace",
"trace": "usage-trace/homo-onephase-50/traces.txt"
},
"ut-homo-twophases-100-10": {
"type": "usage_trace",
"trace": "usage-trace/homo-twophases-100-10/traces.txt"
},
"ut-from-real-trace": {
"type": "usage_trace",
"trace": "usage-trace/from-real-trace/3858728.txt"
}
}
}
3858728_53_1.txt
3858728_53_2.txt
3858728_57_1.txt
3858728_57_2.txt
0 m_usage 0.86 1186
0 m_usage 0.64 469
0 m_usage 0.79 456
0 m_usage 0.84 4643
0 m_usage 0.85 4659
0 m_usage 0.9 1000
0 m_usage 0.83 3614
0 m_usage 0.84 4643
0 m_usage 0.9 933
0 m_usage 0.83 3759
0 m_usage 0.89 1011
0 m_usage 0.83 3614
0 m_usage 0.84 4571
0 m_usage 0.91 923
0 m_usage 0.84 3643
0 m_usage 0.89 1079
0 m_usage 0.83 3614
0 m_usage 0.85 4588
0 m_usage 0.85 4588
0 m_usage 0.91 989
0 m_usage 0.84 3643
0 m_usage 0.85 4659
0 m_usage 0.84 4643
0 m_usage 0.88 3068
0 m_usage 0.79 1519
0 m_usage 0.89 674
1 m_usage 0.98 1041
1 m_usage 0.73 411
1 m_usage 0.88 409
1 m_usage 0.98 2755
1 m_usage 0.88 1364
1 m_usage 1.0 1020
1 m_usage 0.94 3383
1 m_usage 0.99 2424
1 m_usage 0.88 1432
1 m_usage 0.98 2694
1 m_usage 0.89 1416
1 m_usage 1 840
1 m_usage 0.97 2041
1 m_usage 0.89 1281
1 m_usage 0.99 2667
1 m_usage 0.89 1416
1 m_usage 0.99 2667
1 m_usage 0.89 1348
1 m_usage 1 840
1 m_usage 0.98 1898
1 m_usage 0.89 1348
1 m_usage 0.98 2816
1 m_usage 0.88 1364
1 m_usage 0.97 18990
1 m_usage 0.89 1348
1 m_usage 0.99 2727
1 m_usage 0.89 1348
1 m_usage 1.0 600
2 m_usage 0.84 1429
2 m_usage 0.64 469
2 m_usage 0.86 3837
2 m_usage 0.75 960
2 m_usage 0.9 667
2 m_usage 0.86 3070
2 m_usage 0.76 947
2 m_usage 0.86 3698
2 m_usage 0.76 947
2 m_usage 0.86 3767
2 m_usage 0.76 868
2 m_usage 0.86 3837
2 m_usage 0.75 880
2 m_usage 0.86 3698
2 m_usage 0.75 960
2 m_usage 0.86 3628
2 m_usage 0.75 960
2 m_usage 0.86 3698
2 m_usage 0.75 960
2 m_usage 0.86 3767
2 m_usage 0.75 960
2 m_usage 0.85 3741
2 m_usage 0.75 960
2 m_usage 0.86 3698
2 m_usage 0.76 947
2 m_usage 0.87 3724
2 m_usage 0.75 960
2 m_usage 0.87 3655
2 m_usage 0.75 960
2 m_usage 0.91 593
2 m_usage 0.85 3176
2 m_usage 0.75 960
2 m_usage 0.85 3741
2 m_usage 0.76 947
2 m_usage 0.88 682
3 m_usage 0.87 1379
3 m_usage 0.66 455
3 m_usage 0.89 3708
3 m_usage 0.77 935
3 m_usage 0.9 3600
3 m_usage 0.78 923
3 m_usage 0.89 3573
3 m_usage 0.78 923
3 m_usage 0.88 3750
3 m_usage 0.77 779
3 m_usage 0.89 3708
3 m_usage 0.78 846
3 m_usage 0.89 3573
3 m_usage 0.77 935
3 m_usage 0.89 3506
3 m_usage 0.77 935
3 m_usage 0.9 3533
3 m_usage 0.77 935
3 m_usage 0.89 3640
3 m_usage 0.77 935
3 m_usage 0.89 3573
3 m_usage 0.77 935
3 m_usage 0.91 3495
3 m_usage 0.78 923
3 m_usage 0.9 3600
3 m_usage 0.78 923
3 m_usage 0.9 3533
3 m_usage 0.78 923
3 m_usage 0.9 3600
3 m_usage 0.78 923
3 m_usage 0.89 3573
3 m_usage 0.77 935
3 m_usage 0.91 659