Une MAJ de sécurité est nécessaire sur notre version actuelle. Elle sera effectuée lundi 02/08 entre 12h30 et 13h. L'interruption de service devrait durer quelques minutes (probablement moins de 5 minutes).

Commit b6e0a43e authored by MOMMESSIN Clement's avatar MOMMESSIN Clement
Browse files

[code] Handle machine failure/restore events

parent 08d1c4ac
......@@ -65,6 +65,7 @@ class Batsim(object):
self.jobs_manually_changed = set()
self.no_more_static_jobs = False
self.no_more_external_events = False
self.scheduler.bs = self
# import pdb; pdb.set_trace()
......@@ -448,7 +449,7 @@ class Batsim(object):
res_key = "resources_data"
else:
res_key = "compute_resources"
self.resources = {
self.compute_resources = {
res["id"]: res for res in event_data[res_key]}
self.storage_resources = {
res["id"]: res for res in event_data["storage_resources"]}
......@@ -585,7 +586,16 @@ class Batsim(object):
elif event_type == "NOTIFY":
notify_type = event_data["type"]
if notify_type == "no_more_static_job_to_submit":
self.no_more_static_jobs = True
self.scheduler.onNoMoreJobsInWorkloads()
elif notify_type == "no_more_external_event_to_occur":
self.no_more_external_events = True
self.scheduler.onNoMoreExternalEvents()
elif notify_type == "event_machine_unavailable":
self.scheduler.onNotifyEventMachineUnavailable(ProcSet.from_str(event_data["resources"]))
elif notify_type == "event_machine_available":
self.scheduler.onNotifyEventMachineAvailable(ProcSet.from_str(event_data["resources"]))
else:
raise Exception("Unknown event type {}".format(event_type))
......@@ -770,9 +780,17 @@ class BatsimScheduler(object):
raise NotImplementedError()
def onNoMoreJobsInWorkloads(self):
self.bs.no_more_static_jobs = True
self.logger.info("There is no more static jobs in the workoad")
def onNoMoreExternalEvents(self):
self.logger.info("There is no more external events to occur")
def onNotifyEventMachineUnavailable(self, machines):
raise NotImplementedError()
def onNotifyEventMachineAvailable(self, machines):
raise NotImplementedError()
def onBeforeEvents(self):
pass
......
from batsim.batsim import BatsimScheduler, Batsim, Job
import sys
import os
import logging
from procset import ProcSet
from itertools import islice
class FillerSchedWithEvents(BatsimScheduler):
def __init__(self, options):
super().__init__(options)
def onSimulationBegins(self):
self.nb_completed_jobs = 0
self.jobs_completed = []
self.jobs_waiting = []
self.sched_delay = 0.005
self.openJobs = set()
self.availableResources = ProcSet((0,self.bs.nb_compute_resources-1))
self.unavailableResources = ProcSet()
def scheduleJobs(self):
scheduledJobs = []
#print('openJobs = ', self.openJobs)
#print('available = ', self.availableResources)
# Iterating over a copy to be able to remove jobs from openJobs at traversal
for job in set(self.openJobs):
nb_res_req = job.requested_resources
if nb_res_req <= len(self.availableResources):
# Retrieve the *nb_res_req* first availables resources
job_alloc = ProcSet(*islice(self.availableResources, nb_res_req))
job.allocation = job_alloc
scheduledJobs.append(job)
self.availableResources -= job_alloc
self.openJobs.remove(job)
# update time
self.bs.consume_time(self.sched_delay)
# send to uds
if len(scheduledJobs) > 0:
self.bs.execute_jobs(scheduledJobs)
#print('openJobs = ', self.openJobs)
#print('available = ', self.availableResources)
#print('')
def onJobSubmission(self, job):
self.openJobs.add(job)
#self.scheduleJobs()
def onJobCompletion(self, job):
# Resources used by the job and that are unavailable should not be added to available resources
p = job.allocation - self.unavailableResources
self.availableResources |= p
#self.scheduleJobs()
def onNotifyEventMachineUnavailable(self, machines):
self.unavailableResources |= machines
self.availableResources -= machines
def onNotifyEventMachineAvailable(self, machines):
self.unavailableResources -= machines
self.availableResources |= machines
def onNoMoreEvents(self):
self.scheduleJobs()
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment