Commit ab3e0285 authored by S. Lackner's avatar S. Lackner

[sched] Write an additional log file with more information than the Batsim log...

[sched] Write an additional log file with more information than the Batsim log file (e.g. information about the parent job)
parent 01cef3c8
......@@ -14,6 +14,8 @@ class Batsim(object):
DYNAMIC_JOB_PREFIX = "dynamic_job"
DYNAMIC_PROFILE_PREFIX = "dynamic_profile"
WORKLOAD_JOB_SEPARATOR = "!"
WORKLOAD_JOB_SEPARATOR_REPLACEMENT = "%"
def __init__(self, scheduler,
validatingmachine=None,
......@@ -178,16 +180,18 @@ class Batsim(object):
if workload_name is None:
workload_name=Batsim.DYNAMIC_JOB_PREFIX
workload_name = workload_name.replace("!", "%")
workload_name = workload_name.replace(Batsim.WORKLOAD_JOB_SEPARATOR, Batsim.WORKLOAD_JOB_SEPARATOR_REPLACEMENT)
job_id = self.dynamic_id_counter.setdefault(workload_name, 0)
self.dynamic_id_counter[workload_name] += 1
full_job_id = "{}!{}".format(workload_name, job_id)
full_job_id = str(workload_name) + Batsim.WORKLOAD_JOB_SEPARATOR + str(job_id)
if profile_name is None:
profile_name = "{}!{}".format(
Batsim.DYNAMIC_PROFILE_PREFIX, full_job_id)
profile_name = (
Batsim.DYNAMIC_PROFILE_PREFIX
+ Batsim.WORKLOAD_JOB_SEPARATOR
+ str(full_job_id))
msg = {
"timestamp": self.time(),
......
......@@ -63,6 +63,8 @@ class Job:
self._profile = None
self._comment = None
def __setattr__(self, field, value):
object.__setattr__(self, field, value)
try:
......@@ -112,6 +114,16 @@ class Job:
"""The starting time of this job."""
return self._start_time
@property
def comment(self):
"""The comment will be written to the log file on job completion.
This field can be used to write additional data to the out_sched_jobs file."""
return self._comment
@comment.setter
def comment(self, value):
self._comment = value
@property
def dependencies(self):
"""The dependencies of this job.
......@@ -206,6 +218,18 @@ class Job:
return self.state == BatsimJob.State.RUNNING or (
not self.open and self.scheduled and not self.completed)
@property
def success(self):
"""Whether this job has successfully finished its execution."""
return self.state == BatsimJob.State.COMPLETED_SUCCESSFULLY
@property
def failure(self):
"""Whether this job has failed its execution."""
return self.state in [
BatsimJob.State.COMPLETED_KILLED,
BatsimJob.State.COMPLETED_FAILED]
@property
def allocation(self):
"""Returns the current allocation of this job."""
......@@ -418,6 +442,8 @@ class Job:
self._scheduler.info(
"Rejecting job ({job}), reason={reason}",
job=self, reason=self.rejected_reason, type="job_rejection")
self._scheduler._log_job(
self._scheduler.time, self, "rejected", reason)
self._scheduler._batsim.reject_jobs([self._batsim_job])
del self._scheduler._scheduler._jobmap[self._batsim_job.id]
......@@ -519,18 +545,20 @@ class Job:
self._batsim_job.finish_time = self._scheduler.time
self._batsim_job.kill_reason = kill_reason
self._batsim_job.return_code = return_code or 0 if state == Job.State.COMPLETED_SUCCESSFULLY else 1
self._scheduler._log_job(self._scheduler.time, self, "completed")
self._jobs_list.update_element(self)
def __str__(self):
return (
"<Job {}; sub:{} reqtime:{} res:{} prof:{} start:{} fin:{} stat:{} killreason:{} ret:{}>"
"<Job {}; sub:{} reqtime:{} res:{} prof:{} start:{} fin:{} stat:{} killreason:{} ret:{} comment:{}>"
.format(
self.id, self.submit_time, self.requested_time,
self.requested_resources, self.profile,
self.start_time,
self.finish_time, self.state,
self.kill_reason,
self.return_code))
self.return_code,
self.comment))
@classmethod
def create_dynamic_job(cls, *args, **kwargs):
......
......@@ -83,6 +83,7 @@ class BaseBatsimScheduler(BatsimScheduler):
for job in jobobjs:
job._do_complete_job()
self._scheduler._log_job(self._scheduler.time, job, "killed")
self._scheduler.on_jobs_killed(jobobjs)
self._scheduler._do_schedule()
......@@ -124,6 +125,7 @@ class BaseBatsimScheduler(BatsimScheduler):
self._scheduler.info("Job has completed its execution ({job})",
job=jobobj, type="job_completion_received")
self._scheduler._log_job(self._scheduler.time, jobobj, "completed")
jobobj._do_complete_job()
......@@ -282,6 +284,24 @@ class Scheduler(metaclass=ABCMeta):
handler.setFormatter(formatter)
self._event_logger.addHandler(handler)
# Add the logger with information about scheduled jobs.
self._sched_jobs_logger = logging.getLogger(
self.__class__.__name__ + "SchedJobs")
self._sched_jobs_logger.setLevel(logging.DEBUG)
formatter = logging.Formatter('%(message)s')
filename_sched_jobs = "{}_sched_jobs.csv".format(
export_prefix)
try:
os.remove(filename_sched_jobs)
except OSError:
pass
handler = logging.FileHandler(filename_sched_jobs)
handler.setLevel(logging.DEBUG)
handler.setFormatter(formatter)
self._sched_jobs_logger.addHandler(handler)
self._log_job_header()
@property
def events(self):
"""The events happened in the scheduler."""
......@@ -360,6 +380,64 @@ class Scheduler(metaclass=ABCMeta):
return str(event)
def _log_job_header(self):
header = [
"time",
"full_job_id",
"workload_name",
"job_id",
"full_parent_job_id",
"parent_workload_name",
"parent_job_id",
"submission_time",
"requested_number_of_processors",
"requested_time",
"success",
"starting_time",
"finish_time",
"comment",
"type",
"reason"
]
self._sched_jobs_logger.info(";".join([str(i) for i in header]))
def _log_job(
self,
time,
job,
type_of_completion,
reason_for_completion=""):
full_parent_job_id = ""
parent_job_id = ""
parent_workload_name = ""
if job.parent_job:
full_parent_job_id = job.parent_job.id
split_parent = full_parent_job_id.split(
Batsim.WORKLOAD_JOB_SEPARATOR)
parent_workload_name = split_parent[0]
parent_job_id = split_parent[1]
id = job.id.split(Batsim.WORKLOAD_JOB_SEPARATOR)
msg = [
time,
job.id,
id[0],
id[1],
full_parent_job_id,
parent_workload_name,
parent_job_id,
job.submit_time,
job.requested_resources,
job.requested_time,
1 if job.success else 0,
job.start_time,
job.finish_time,
job.comment or "",
type_of_completion,
reason_for_completion
]
self._sched_jobs_logger.info(";".join([str(i) for i in msg]))
def debug(self, msg, **kwargs):
"""Writes a debug message to the logging facility."""
self._logger.debug(self._format_log_msg(msg, **kwargs))
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment