Commit 5598e167 authored by Jérémie Gaidamour's avatar Jérémie Gaidamour
Browse files

[dev] run-g5kchecks implemented -s, -c, -n with oar reservations

parent a2df87c8
...@@ -8,9 +8,10 @@ ...@@ -8,9 +8,10 @@
# - If an output YAML file already exist in ouput/, the execution of g5k-check on the corresponding node is skipped. # - If an output YAML file already exist in ouput/, the execution of g5k-check on the corresponding node is skipped.
# - Use postprocessing.rb for moving the file in th input/ directory. This script also edits some keys of the YAML files. # - Use postprocessing.rb for moving the file in th input/ directory. This script also edits some keys of the YAML files.
require 'optparse'
require 'fileutils'
require 'cute' require 'cute'
require 'peach' require 'peach'
require 'fileutils'
require '../lib/input_loader' require '../lib/input_loader'
require 'pp' require 'pp'
...@@ -61,7 +62,7 @@ OptionParser.new do |opts| ...@@ -61,7 +62,7 @@ OptionParser.new do |opts|
opts.separator "" opts.separator ""
opts.separator "Node reservation options:" opts.separator "Node reservation options:"
opts.on('-q', '--queue', 'Specify an OAR reservation queue') do |q| opts.on('-qQUEUE', '--queue=queue', String, 'Specify an OAR reservation queue') do |q|
options[:queue] = q options[:queue] = q
end end
...@@ -112,8 +113,6 @@ puts "Options: #{options}" if options[:verbose] ...@@ -112,8 +113,6 @@ puts "Options: #{options}" if options[:verbose]
# #
# #
$jobs = [] # list of OAR reservation
FileUtils::mkdir_p("output/") FileUtils::mkdir_p("output/")
def run_g5kcheck(site_uid, fnode_uid, options) def run_g5kcheck(site_uid, fnode_uid, options)
...@@ -139,7 +138,9 @@ end ...@@ -139,7 +138,9 @@ end
def oarsub(site_uid, resources, queue) def oarsub(site_uid, resources, queue)
begin begin
$jobs << $g5k.reserve(:site => site_uid, :resources => resources, :walltime => '00:30:00', :wait => false, :queue => queue)
$g5k.reserve(:site => site_uid, :resources => resources, :walltime => '00:30:00', :wait => false, :queue => queue)
rescue Exception => e rescue Exception => e
puts "#{site_uid}: Error during the reservation '#{resources}' at #{site_uid} - #{e.class}: #{e.message}" puts "#{site_uid}: Error during the reservation '#{resources}' at #{site_uid} - #{e.class}: #{e.message}"
end end
...@@ -176,7 +177,7 @@ if options[:force] ...@@ -176,7 +177,7 @@ if options[:force]
end end
nodes_status = $g5k.nodes_status(site_uid) if nodes_status.nil? nodes_status = $g5k.nodes_status(site_uid) if nodes_status.nil?
if prompt != 'yes-all' && nodes_status["#{node_uid}.#{site_uid}.grid5000.fr"] && nodes_status["#{fnode_uid}"] == "busy" if prompt != 'yes-all' && nodes_status[fnode_uid] && nodes_status[fnode_uid] == "busy"
if prompt != 'no-all' if prompt != 'no-all'
printf "#{site_uid} - #{node_uid} is busy (ie. there is currently an OAR reservation. Run g5k-checks on reserved nodes ? (y/yes-all/no-all/N) " printf "#{site_uid} - #{node_uid} is busy (ie. there is currently an OAR reservation. Run g5k-checks on reserved nodes ? (y/yes-all/no-all/N) "
prompt = STDIN.gets.chomp prompt = STDIN.gets.chomp
...@@ -197,11 +198,111 @@ if options[:force] ...@@ -197,11 +198,111 @@ if options[:force]
} }
} }
else # options[:force] else # ! options[:force]
puts 'Temporarily disabled. Use --force' options[:sites].peach { |site_uid|
exit
end jobs = [] # list of OAR reservation
begin
#
# Node reservation
#
nodes_status = $g5k.nodes_status(site_uid)
if options[:nodes]
# Reserve nodes one by one
options[:nodes].each { |node_uid| jobs << oarsub(site_uid, "{host='#{node_uid.split('.')[0]}'}", options[:queue]) }
else
clusters = $g5k.cluster_uids(site_uid)
# Reserve as many free node as possible in one reservation
if options[:clusters]
options[:clusters].each { |cluster_uid|
jobs << oarsub(site_uid, "{cluster='#{cluster_uid}'}/nodes=BEST", options[:queue]) if clusters.include?(cluster_uid)
}
else
jobs << oarsub(site_uid, "nodes=BEST", options[:queue])
end
# Reserve busy nodes one by one
$g5k.nodes_status(site_uid).each { |fnode_uid, status|
cluster_uid = fnode_uid.split(/-/).first
next if options[:clusters] && ! options[:clusters].include?(cluster_uid)
next if File.exist?("output/#{fnode_uid}.yaml") # skip reservation if we alread have the node info
next if status != "busy" # only busy nodes
jobs << oarsub(site_uid, "{host='#{fnode_uid}'}", options[:queue])
}
end
#
# Process running jobs
#
released_jobs = {};
loop do
waiting_jobs = $g5k.get_my_jobs(site_uid, state='waiting')
running_jobs = $g5k.get_my_jobs(site_uid, state='running')
launching_jobs = $g5k.get_my_jobs(site_uid, state='launching')
puts "#{site_uid}: Running: #{running_jobs.size} - Waiting: #{waiting_jobs.size} - Launching: #{launching_jobs.size}"
running_jobs.each { |job|
job_uid = job['uid']
next unless jobs.any? { |j| j['uid'] == job_uid } # skip reservations that are not related to this script
if released_jobs[job_uid]
puts "#{site_uid}: #{job_uid} already processed" # AOR job deletions can take some times
next
end
puts "#{site_uid}: Processing #{job_uid}"
job['assigned_nodes'].peach(10) { |fnode_uid|
next if File.exist?("output/#{fnode_uid}.yaml")
run_g5kcheck(site_uid, fnode_uid, options)
}
puts "#{site_uid}: Release #{job_uid}"
begin
$g5k.release(job)
released_jobs[job_uid] = true
rescue Exception => e
puts "#{site_uid}: Error while releasing job #{job_uid} - #{e.class}: #{e.message}"
end
}
# Stop when there isn't any job left
break if running_jobs.empty? and waiting_jobs.empty? and launching_jobs.empty?
# Wait a little bit when the previous loop iteration did not find any job to process
sleep(5) if running_jobs.empty?
end
rescue Exception => e
puts "#{e.class}: #{e.message}"
ensure
jobs.each { |job|
puts "Release job #{job['links'][0]['href']}"
$g5k.release(job)
} if jobs.size > 0
end # begin/rescue/ensure
} # options[:sites].each
end # options[:force]
`ruby postprocessing.rb` `ruby postprocessing.rb`
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment