""" File: monitor_igrida.py Author: Kwon-Young Choi Email: kwon-young.choi@irisa.fr Date: 2020-04-24 Description: Monitor jobs on igrida. This scripts does the same jobs as `oarstat` but can gather more useful information such as the gpu model for a job or the name of the node which runs the job. Try `oarstat -f -j JOB_ID` for a complete list of information that you can gather for a job. You can also use `oarstat -p -j JOB_ID` to gather additional properties for running jobs. """ import argparse from subprocess import run, PIPE import shlex import time import json import pandas as pd def run_command(command): command = shlex.split(command) output = run(command, stdout=PIPE) try: output = json.loads(output.stdout) except json.decoder.JSONDecodeError: print(f"Empty results for `{' '.join(command)}`") output = {} return output def get_job_array_info(job_array_id): command = 'oarstat -J --array {} -f'.format(job_array_id) return run_command(command) def get_job_info(job_array_id): command = 'oarstat -J -j {} -f'.format(job_array_id) return run_command(command) def get_user_job_info(user): command = 'oarstat -J -f -u {}'.format(user) return run_command(command) def get_job_property(job_id): command = f'oarstat -j {job_id} -p' command = shlex.split(command) output = run(command, stdout=PIPE) output = output.stdout.decode() if output == '': return {} output = output.splitlines()[0] key_values = output.split(',') data = {} for key_value in key_values: key_value = key_value.strip() key, value = key_value.split('=') value = value.strip().strip("'") data[key.strip()] = value return data def get_Durations(job_data): for id, data in job_data.items(): start = data['startTime'] end = data['stopTime'] if end == 0 and start != 0: end = int(time.time()) duration = end - start job_data[id]['Duration'] = duration return job_data prop_func_map = { "Duration": get_Durations, } def print_data(job_data, properties): df = pd.DataFrame.from_dict(job_data, orient='index') df['Duration'] = df['Duration'].map( lambda x: f'{int(x / 3600):02}:{int(x / 60) % 60:02}:{x % 60:03}') df['command'] = df['command'].map(lambda x: x.split()[1:]) properties = df.columns.intersection(properties).sort_values() df = df[properties] df.index.name = "Job_Id" print(df) return def main(args): job_data = {} for user in args.user: job_data.update(get_user_job_info(user)) for job_id in args.array: job_data.update(get_job_array_info(job_id)) for job_id in args.job: job_data.update(get_job_info(job_id)) for job_id in job_data: job_data[job_id].update(get_job_property(job_id)) job_data = get_Durations(job_data) default_prop = [ "array_index", "state", "job_user", "Duration", "message", ] default_prop.extend(args.properties) print_data(job_data, default_prop) return if __name__ == "__main__": parser = argparse.ArgumentParser( description="""Monitor jobs on igrida. This scripts does the same jobs as `oarstat` but can gather more useful information such as the gpu model for a job or the name of the node which runs the job. Try `oarstat -f -j JOB_ID` for a complete list of information that you can gather for a job. You can also use `oarstat -p -j JOB_ID` to gather additional properties for running jobs.""", usage="python monitor_igrida.py -j JOB_ID -a JOB_ARRAY_ID -p gpu_model" "-p command") parser.add_argument("-u", "--user", action='store', help="user", type=str, nargs='*', default=[]) parser.add_argument("-j", "--job", action='store', help="job id", type=int, nargs='*', default=[]) parser.add_argument("-a", "--array", action='store', help="job array id", type=int, nargs='*', default=[]) parser.add_argument("-p", "--properties", help="additional job properties", action='store', type=str, nargs='*', default=[]) args = parser.parse_args() main(args)