Commit 96f3bee9 authored by Jérémie Gaidamour's avatar Jérémie Gaidamour

[dev] run-g5kchecks.rb

parent 219eba55
require 'cute'
require 'peach'
require 'fileutils'
require 'pp'
options ||= {}
options[:ssh] ||= {}
options[:ssh][:user] = 'g5kadmin' unless options[:ssh][:user]
options[:ssh][:host] = '%s.g5kadmin' unless options[:ssh][:host]
options[:ssh][:params] ||= {:timeout => 10}
options[:queue] = 'admin'
options[:queue] = 'default'
jobs = []
FileUtils::mkdir_p("output/")
begin
g5k = Cute::G5K::API.new()
g5k.site_uids().peach { |site_uid|
#
# Node reservation
#
# Reserve as many free node as possible in one reservation
begin
jobs << g5k.reserve(:site => site_uid, :resources => "nodes=BEST", :walltime => '00:30:00', :wait => false, :queue => options[:queue])
rescue Exception => e
puts "#{site_uid}: Error during the reservation nodes=BEST"
end
# Reserve busy nodes one by one
g5k.nodes_status(site_uid).each { |node_uid, status|
next if File.exist?("output/#{node_uid}.yaml") # skip reservation if we alread have the node info
next if status != "busy" # only busy nodes
begin
jobs << g5k.reserve(:site => site_uid, :resources => "{host='#{node_uid}'}", :walltime => '00:30:00', :wait => false, :queue => options[:queue])
rescue Exception => e
puts "#{site_uid}: Error during the reservation of #{node_uid} - #{e.class}: #{e.message}"
end
}
#
# Process running jobs
#
released_jobs = {};
loop do
waiting_jobs = g5k.get_my_jobs(site_uid, state='waiting')
running_jobs = g5k.get_my_jobs(site_uid, state='running')
launching_jobs = g5k.get_my_jobs(site_uid, state='launching')
puts "#{site_uid}: Running: #{running_jobs.size} - Waiting: #{waiting_jobs.size} - Launching: #{launching_jobs.size}"
running_jobs.each { |job|
job_uid = job['uid']
if released_jobs[job_uid]
puts "#{site_uid}: #{job_uid} already processed"
next
end
puts "#{site_uid}: Processing #{job_uid}"
job['assigned_nodes'].peach(10) { |node_uid|
next if File.exist?("output/#{node_uid}.yaml")
puts "#{site_uid}: Processing #{job_uid} - #{node_uid}"
begin
Net::SSH.start(options[:ssh][:host].gsub("%s", site_uid), options[:ssh][:user], options[:ssh][:params]) { |ssh|
output1 = ssh.exec!("sudo ssh -o StrictHostKeychecking=no root@#{node_uid} '/usr/bin/g5k-checks -m api'")
output2 = ssh.exec!("sudo ssh -q -o StrictHostKeychecking=no root@#{node_uid} 'cat /tmp/#{node_uid}.yaml'")
File.open("output/#{node_uid}.yaml", 'w') do |f|
f.write(output2)
end
}
rescue Exception => e
puts "#{site_uid}: Error while processing #{job_uid} - #{node_uid} - #{e.class}: #{e.message}"
end
}
puts "#{site_uid}: Release #{job_uid}"
begin
g5k.release(job)
released_jobs[job_uid] = true
rescue Exception => e
puts "#{site_uid}: Error while releasing job #{job_uid} - #{e.class}: #{e.message}"
end
}
# Stop when there isn't any job left
break if running_jobs.empty? and waiting_jobs.empty? and launching_jobs.empty?
# Wait a little bit when the previous loop iteration did not find any job to process
sleep(5) if running_jobs.empty?
end
}
rescue Exception => e
puts "#{e.class}: #{e.message}"
ensure
jobs.each { |job|
puts "Release job #{job['links'][0]['href']}"
g5k.release(job)
}
end
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment