Mentions légales du service

Skip to content
Snippets Groups Projects
Commit b7b995f2 authored by Alexandre MERLIN's avatar Alexandre MERLIN
Browse files

[gen/oar-properties] autofix with rubocop

parent 2c8e254b
No related branches found
No related tags found
No related merge requests found
Pipeline #948731 failed
# coding: utf-8
require 'hashdiff'
require 'refrepo/data_loader'
require 'net/ssh'
require 'refrepo/gpu_ref'
# TODO for gpu_model (and others?) use NULL instead of empty string
# TODO: for gpu_model (and others?) use NULL instead of empty string
class MissingProperty < StandardError; end
......@@ -16,101 +14,97 @@ MiB = 1024**2
############################################
module OarProperties
# OAR API data cache
@@oar_data = {}
def table_separator(cols)
"+-#{cols.map { |_k, v| '-' * v }.join('-+-')}-+"
end
def display_resources_table(generated_hierarchy)
cols = { cluster: 11, host: 20, cpu: 5, core: 5, cpuset: 6, cpumodel: 25, gpu: 4, gpudevice: 10, gpumodel: 30 }
puts table_separator(cols)
puts "| #{cols.map { |k, v| k.to_s.ljust(v) }.join(' | ')} |"
# OAR API data cache
@@oar_data = {}
def table_separator(cols)
return "+-#{cols.map{|_k, v| '-' * v }.join('-+-')}-+"
end
def display_resources_table(generated_hierarchy)
cols = {cluster: 11, host: 20, cpu: 5, core: 5, cpuset: 6, cpumodel: 25, gpu: 4, gpudevice: 10, gpumodel: 30}
puts table_separator(cols)
puts "| #{cols.map{|k, v| k.to_s.ljust(v) }.join(' | ')} |"
host_prev = ""
generated_hierarchy[:nodes].map{|node| node[:oar_rows]}.flatten.each do |row|
if row[:host] != host_prev
puts table_separator(cols)
host_prev = row[:host]
host_prev = ''
generated_hierarchy[:nodes].map { |node| node[:oar_rows] }.flatten.each do |row|
if row[:host] != host_prev
puts table_separator(cols)
host_prev = row[:host]
end
puts "| #{cols.map { |k, v| row[k].to_s.ljust(v) }.join(' | ')} |"
end
puts "| #{cols.map{|k, v| row[k].to_s.ljust(v)}.join(' | ')} |"
puts table_separator(cols)
end
puts table_separator(cols)
end
############################################
# Functions related to the "PRINT" operation
############################################
############################################
# Functions related to the "PRINT" operation
############################################
# Generates an ASCII separator
def generate_separators
command = "echo '================================================================================'"
command + "\n"
end
def generate_create_disk_cmd(host, disk)
disk_exist = "disk_exist '#{host}' '#{disk}'"
command = "echo; echo 'Adding disk #{disk} on host #{host}:'\n"
command += "#{disk_exist} && echo '=> disk already exists'\n"
command += "#{disk_exist} || oarnodesetting -a -h '' -p host='#{host}' -p type='disk' -p disk='#{disk}'"
command + "\n\n"
end
# Generates an ASCII separator
def generate_separators()
command = "echo '================================================================================'"
return command + "\n"
end
def generate_create_disk_cmd(host, disk)
disk_exist = "disk_exist '#{host}' '#{disk}'"
command = "echo; echo 'Adding disk #{disk} on host #{host}:'\n"
command += "#{disk_exist} && echo '=> disk already exists'\n"
command += "#{disk_exist} || oarnodesetting -a -h '' -p host='#{host}' -p type='disk' -p disk='#{disk}'"
return command + "\n\n"
end
def generate_set_node_properties_cmd(host, default_properties)
if not default_properties.nil?
def generate_set_node_properties_cmd(host, default_properties)
if default_properties.nil?
return "echo ; echo 'Not setting properties for #{host}: node is not available in ref-api'; echo\n\n"
end
return '' if default_properties.size == 0
command = "echo; echo 'Setting properties for #{host}:'; echo\n"
command += "oarnodesetting --sql \"host='#{host}' and type='default'\" -p "
command += properties_internal(default_properties)
return command + "\n\n"
else
return "echo ; echo 'Not setting properties for #{host}: node is not available in ref-api'; echo\n\n"
command + "\n\n"
end
end
def generate_set_disk_properties_cmd(host, disk, disk_properties)
return '' if disk_properties.size == 0
command = "echo; echo 'Setting properties for disk #{disk} on host #{host}:'; echo\n"
command += "oarnodesetting --sql \"host='#{host}' and type='disk' and disk='#{disk}'\" -p "
command += disk_properties_internal(disk_properties)
return command + "\n\n"
end
def generate_create_oar_property_cmd(properties_keys)
command = ''
ignore_keys_list = ignore_default_keys()
properties_keys.each do |key, key_type|
if ignore_keys_list.include?(key)
next
end
# keys such as deploy or besteffort are default OAR keys that should not be created
if oar_system_keys.include?(key)
next
end
if key_type == Integer
command += "property_exist '#{key}' || oarproperty -a #{key}\n"
elsif key_type == String
command += "property_exist '#{key}' || oarproperty -a #{key} --varchar\n"
else
raise "Error: the type of the '#{key}' property is unknown (Integer/String). Cannot generate the corresponding 'oarproperty' command. You must create this property manually ('oarproperty -a #{key} [--varchar]')"
end
def generate_set_disk_properties_cmd(host, disk, disk_properties)
return '' if disk_properties.size == 0
command = "echo; echo 'Setting properties for disk #{disk} on host #{host}:'; echo\n"
command += "oarnodesetting --sql \"host='#{host}' and type='disk' and disk='#{disk}'\" -p "
command += disk_properties_internal(disk_properties)
command + "\n\n"
end
return command
end
def generate_create_oar_property_cmd(properties_keys)
command = ''
ignore_keys_list = ignore_default_keys
properties_keys.each do |key, key_type|
next if ignore_keys_list.include?(key)
# keys such as deploy or besteffort are default OAR keys that should not be created
next if oar_system_keys.include?(key)
if key_type == Integer
command += "property_exist '#{key}' || oarproperty -a #{key}\n"
elsif key_type == String
command += "property_exist '#{key}' || oarproperty -a #{key} --varchar\n"
else
raise "Error: the type of the '#{key}' property is unknown (Integer/String). Cannot generate the corresponding 'oarproperty' command. You must create this property manually ('oarproperty -a #{key} [--varchar]')"
end
end
command
end
# Generates helper functions:
# - property_exist : check if a property exists
# - node_exist : check if a node exists
# - disk_exist : check if a disk exists
#
# and variables which help to add nex resources:
# - NEXT_AVAILABLE_CPU_ID : the next identifier that can safely be used for a new cpi
# - NEXT_AVAILABLE_CORE_ID : the next identifier that can safely be used for a new core
def generate_oar_commands_header()
return %Q{
# Generates helper functions:
# - property_exist : check if a property exists
# - node_exist : check if a node exists
# - disk_exist : check if a disk exists
#
# and variables which help to add nex resources:
# - NEXT_AVAILABLE_CPU_ID : the next identifier that can safely be used for a new cpi
# - NEXT_AVAILABLE_CORE_ID : the next identifier that can safely be used for a new core
def generate_oar_commands_header
%{
#! /usr/bin/env bash
set -eu
......@@ -146,20 +140,18 @@ disk_exist () {
# let "NEXT_AVAILABLE_CORE_ID=MAX_CORE_ID+1"
# fi
}
end
# Ensures that all required OAR properties exists.
# OAR properties can be divided in two sets:
# - properties that were previously created by 'oar_resources_add'
# - remaining properties that can be generated from API
def generate_oar_property_creation(site_name, data_hierarchy)
end
#############################################
# Create properties that were previously created
# by 'oar_resources_add'
##############################################
commands = %Q{
# Ensures that all required OAR properties exists.
# OAR properties can be divided in two sets:
# - properties that were previously created by 'oar_resources_add'
# - remaining properties that can be generated from API
def generate_oar_property_creation(site_name, data_hierarchy)
#############################################
# Create properties that were previously created
# by 'oar_resources_add'
##############################################
commands = %(
#############################################
# Create OAR properties that were created by 'oar_resources_add'
#############################################
......@@ -171,385 +163,405 @@ property_exist 'gpu' || oarproperty -a gpu
property_exist 'disk' || oarproperty -a disk --varchar
property_exist 'diskpath' || oarproperty -a diskpath --varchar
}
)
#############################################
# Create remaining properties (from API)
#############################################
commands += %Q{
#############################################
# Create remaining properties (from API)
#############################################
commands += %(
#############################################
# Create OAR properties if they don't exist
#############################################
}
# Fetch oar properties from ref repo
# global_hash = load_data_hierarchy
global_hash = data_hierarchy
site_properties = get_oar_properties_from_the_ref_repo(global_hash, {
:sites => [site_name]
})[site_name]
property_keys = get_property_keys(site_properties)
# Generate OAR commands for creating properties
commands += generate_create_oar_property_cmd(property_keys)
return commands
end
)
# Fetch oar properties from ref repo
# global_hash = load_data_hierarchy
global_hash = data_hierarchy
site_properties = get_oar_properties_from_the_ref_repo(global_hash, {
sites: [site_name]
})[site_name]
property_keys = get_property_keys(site_properties)
# Exports a description of OAR ressources as a bunch of "self contained" OAR
# commands. Basically it does the following:
# (0) * It adds an header containing helper functions and detects the next
# CPU and CORE IDs that could be used by new OAR resources
# (cf "generate_oar_commands_header()")
# (0) * It creates OAR properties if they don't already exist (cf "generate_oar_property_creation()")
# (1) * It iterates over nodes contained in the 'generated_hierarchy' hash-table
# (2) > * It iterates over the oar resources of the node
# (3) > * If the resource already exists, the CPU and CORE associated to the resource is detected
# (4) * The resource is exported as an OAR command
# (5) * If applicable, create/update the storage devices associated to the node
def export_rows_as_oar_command(generated_hierarchy, site_name, site_properties, data_hierarchy)
result = ""
# Generate helper functions and detect the next available CPU and CORE IDs for
# non exisiting resources
result += generate_oar_commands_header()
# Ensure that OAR properties exist before creating/updating OAR resources
result += generate_oar_property_creation(site_name, data_hierarchy)
# Iterate over nodes of the generated resource hierarchy
generated_hierarchy[:nodes].each do |node|
# Generate OAR commands for creating properties
commands += generate_create_oar_property_cmd(property_keys)
commands
end
result += %Q{
# Exports a description of OAR ressources as a bunch of "self contained" OAR
# commands. Basically it does the following:
# (0) * It adds an header containing helper functions and detects the next
# CPU and CORE IDs that could be used by new OAR resources
# (cf "generate_oar_commands_header()")
# (0) * It creates OAR properties if they don't already exist (cf "generate_oar_property_creation()")
# (1) * It iterates over nodes contained in the 'generated_hierarchy' hash-table
# (2) > * It iterates over the oar resources of the node
# (3) > * If the resource already exists, the CPU and CORE associated to the resource is detected
# (4) * The resource is exported as an OAR command
# (5) * If applicable, create/update the storage devices associated to the node
def export_rows_as_oar_command(generated_hierarchy, site_name, site_properties, data_hierarchy)
result = ''
# Generate helper functions and detect the next available CPU and CORE IDs for
# non exisiting resources
result += generate_oar_commands_header
# Ensure that OAR properties exist before creating/updating OAR resources
result += generate_oar_property_creation(site_name, data_hierarchy)
# Iterate over nodes of the generated resource hierarchy
generated_hierarchy[:nodes].each do |node|
result += %(
###################################
# #{node[:fqdn]}
###################################
}
# Iterate over the resources of the OAR node
node[:oar_rows].each do |oar_ressource_row|
# host = oar_ressource_row[:host].to_s
host = oar_ressource_row[:fqdn].to_s
cpu = oar_ressource_row[:cpu].to_s
core = oar_ressource_row[:core].to_s
cpuset = oar_ressource_row[:cpuset].to_s
gpu = oar_ressource_row[:gpu].to_s
gpudevice = oar_ressource_row[:gpudevice].to_s
gpumodel = oar_ressource_row[:gpumodel].to_s
gpudevicepath = oar_ressource_row[:gpudevicepath].to_s
resource_id = oar_ressource_row[:resource_id]
if resource_id == -1 or resource_id.nil?
# Add the resource to the OAR DB
if gpu == ''
result += "oarnodesetting -a -h '#{host}' -s Absent -p host='#{host}' -p cpu=#{cpu} -p core=#{core} -p cpuset=#{cpuset}\n"
else
result += "oarnodesetting -a -h '#{host}' -s Absent -p host='#{host}' -p cpu=#{cpu} -p core=#{core} -p cpuset=#{cpuset} -p gpu=#{gpu} -p gpu_model='#{gpumodel}' -p gpudevice=#{gpudevice} # This GPU is mapped on #{gpudevicepath}\n"
end
else
# Update the resource
if gpu == ''
)
# Iterate over the resources of the OAR node
node[:oar_rows].each do |oar_ressource_row|
# host = oar_ressource_row[:host].to_s
host = oar_ressource_row[:fqdn].to_s
cpu = oar_ressource_row[:cpu].to_s
core = oar_ressource_row[:core].to_s
cpuset = oar_ressource_row[:cpuset].to_s
gpu = oar_ressource_row[:gpu].to_s
gpudevice = oar_ressource_row[:gpudevice].to_s
gpumodel = oar_ressource_row[:gpumodel].to_s
gpudevicepath = oar_ressource_row[:gpudevicepath].to_s
resource_id = oar_ressource_row[:resource_id]
if resource_id == -1 or resource_id.nil?
# Add the resource to the OAR DB
if gpu == ''
result += "oarnodesetting -a -h '#{host}' -s Absent -p host='#{host}' -p cpu=#{cpu} -p core=#{core} -p cpuset=#{cpuset}\n"
else
result += "oarnodesetting -a -h '#{host}' -s Absent -p host='#{host}' -p cpu=#{cpu} -p core=#{core} -p cpuset=#{cpuset} -p gpu=#{gpu} -p gpu_model='#{gpumodel}' -p gpudevice=#{gpudevice} # This GPU is mapped on #{gpudevicepath}\n"
end
elsif gpu == ''
# Update the resource
result += "oarnodesetting --sql \"host='#{host}' AND resource_id='#{resource_id}' AND type='default'\" -p cpu=#{cpu} -p core=#{core} -p cpuset=#{cpuset}\n"
else
result += "oarnodesetting --sql \"host='#{host}' AND resource_id='#{resource_id}' AND type='default'\" -p cpu=#{cpu} -p core=#{core} -p cpuset=#{cpuset} -p gpu=#{gpu} -p gpu_model='#{gpumodel}' -p gpudevice=#{gpudevice} # This GPU is mapped on #{gpudevicepath}\n"
end
end
end
# Set the OAR properties of the OAR node
result += generate_set_node_properties_cmd(node[:fqdn], node[:default_description])
# Iterate over storage devices
node[:description]["storage_devices"].select{|v| v.key?("reservation") and v["reservation"]}.each do |storage_device|
# As <storage_device> is an Array, where the first element is the device id (i.e. disk0, disk1, ...),
# and the second element is a dictionnary containing information about the storage device,
# thus two variables are created:
# - <storage_device_name> : variable that contains the device id (disk0, disk1, ...)
# - <storage_device_name_with_hostname> : variable that will be the ID of the storage. It follows this
# pattern : "disk1.ecotype-48"
storage_device_name = storage_device["id"]
storage_device_name_with_hostname = "#{storage_device_name}.#{node[:name]}"
# Retried the site propertie that corresponds to this storage device
storage_device_oar_properties_tuple = site_properties["disk"].select { |keys| keys.include?(storage_device_name_with_hostname) }.first
if storage_device_oar_properties_tuple.nil? or storage_device_oar_properties_tuple.size < 2
raise "Error: could not find a site properties for disk #{storage_device_name_with_hostname}"
end
storage_device_oar_properties = storage_device_oar_properties_tuple[1]
# Set the OAR properties of the OAR node
result += generate_set_node_properties_cmd(node[:fqdn], node[:default_description])
# Iterate over storage devices
node[:description]['storage_devices'].select do |v|
v.key?('reservation') and v['reservation']
end.each do |storage_device|
# As <storage_device> is an Array, where the first element is the device id (i.e. disk0, disk1, ...),
# and the second element is a dictionnary containing information about the storage device,
# thus two variables are created:
# - <storage_device_name> : variable that contains the device id (disk0, disk1, ...)
# - <storage_device_name_with_hostname> : variable that will be the ID of the storage. It follows this
# pattern : "disk1.ecotype-48"
storage_device_name = storage_device['id']
storage_device_name_with_hostname = "#{storage_device_name}.#{node[:name]}"
# Retried the site propertie that corresponds to this storage device
storage_device_oar_properties_tuple = site_properties['disk'].select do |keys|
keys.include?(storage_device_name_with_hostname)
end.first
if storage_device_oar_properties_tuple.nil? or storage_device_oar_properties_tuple.size < 2
raise "Error: could not find a site properties for disk #{storage_device_name_with_hostname}"
end
storage_device_oar_properties = storage_device_oar_properties_tuple[1]
result += generate_separators()
result += generate_separators
# Ensure that the storage device exists
result += generate_create_disk_cmd(node[:fqdn], storage_device_name_with_hostname)
# Ensure that the storage device exists
result += generate_create_disk_cmd(node[:fqdn], storage_device_name_with_hostname)
# Set the OAR properties associated to this storage device
result += generate_set_disk_properties_cmd(node[:fqdn], storage_device_name_with_hostname,
storage_device_oar_properties)
end
# Set the OAR properties associated to this storage device
result += generate_set_disk_properties_cmd(node[:fqdn], storage_device_name_with_hostname, storage_device_oar_properties)
result += generate_separators
end
result += generate_separators()
result
end
return result
end
############################################
# Functions related to the "DIFF" operation
############################################
############################################
# Functions related to the "DIFF" operation
############################################
def get_ids(host)
node_uid, site_uid, grid_uid, _tdl = host.split('.')
cluster_uid, node_num = node_uid.split('-')
{ 'node_uid' => node_uid, 'site_uid' => site_uid, 'grid_uid' => grid_uid, 'cluster_uid' => cluster_uid,
'node_num' => node_num }
end
def get_ids(host)
node_uid, site_uid, grid_uid, _tdl = host.split('.')
cluster_uid, node_num = node_uid.split('-')
ids = { 'node_uid' => node_uid, 'site_uid' => site_uid, 'grid_uid' => grid_uid, 'cluster_uid' => cluster_uid, 'node_num' => node_num }
return ids
end
# Get all node properties of a given site from the reference repo hash
# See also: https://www.grid5000.fr/w/Reference_Repository
def get_ref_default_properties(_site_uid, site)
properties = {}
site.fetch('clusters', {}).each do |cluster_uid, cluster|
cluster['nodes'].each do |node_uid, node|
begin
# Get all node properties of a given site from the reference repo hash
# See also: https://www.grid5000.fr/w/Reference_Repository
def get_ref_default_properties(_site_uid, site)
properties = {}
site.fetch('clusters', {}).each do |cluster_uid, cluster|
cluster['nodes'].each do |node_uid, node|
properties[node_uid] = get_ref_node_properties_internal(cluster_uid, cluster, node_uid, node)
rescue MissingProperty => e
puts "Error (missing property) while processing node #{node_uid}: #{e}"
rescue StandardError => e
puts "FATAL ERROR while processing node #{node_uid}: #{e}"
puts "Description of the node is:"
puts 'Description of the node is:'
pp node
raise
end
end
properties
end
return properties
end
def get_ref_disk_properties(site_uid, site)
properties = {}
site.fetch('clusters', {}).each do |cluster_uid, cluster|
cluster['nodes'].each do |node_uid, node|
begin
def get_ref_disk_properties(site_uid, site)
properties = {}
site.fetch('clusters', {}).each do |cluster_uid, cluster|
cluster['nodes'].each do |node_uid, node|
properties.merge!(get_ref_disk_properties_internal(site_uid, cluster_uid, cluster, node_uid, node))
rescue MissingProperty => e
puts "Error while processing node #{node_uid}: #{e}"
end
end
properties
end
return properties
end
# Generates the properties of a single node
def get_ref_node_properties_internal(cluster_uid, cluster, node_uid, node)
h = {}
main_network_adapter = node['network_adapters'].find { |na| /^eth[0-9]*$/.match(na['device']) && na['enabled'] && na['mounted'] && !na['management'] }
raise MissingProperty, "Node #{node_uid} does not have a main network_adapter (ie. an ethernet interface with enabled=true && mounted==true && management==false)" unless main_network_adapter
h['ip'] = main_network_adapter['ip']
raise MissingProperty, "Node #{node_uid} has no IP" unless h['ip']
h['cluster'] = cluster_uid
h['nodemodel'] = cluster['model']
h['switch'] = main_network_adapter['switch']
h['besteffort'] = node['supported_job_types']['besteffort']
h['deploy'] = node['supported_job_types']['deploy']
h['virtual'] = node['supported_job_types']['virtual']
h['cpuarch'] = node['architecture']['platform_type']
h['cpucore'] = node['architecture']['nb_cores'] / node['architecture']['nb_procs']
h['cpu_count'] = node['architecture']['nb_procs']
h['core_count'] = node['architecture']['nb_cores']
h['thread_count'] = node['architecture']['nb_threads']
h['cputype'] = [node['processor']['model'], node['processor']['version']].join(' ')
h['cpufreq'] = node['processor']['clock_speed'] / 1_000_000_000.0
h['disktype'] = [node['storage_devices'].first['interface'], node['storage_devices'].first['storage']].join('/')
h['chassis'] = [node['chassis']['manufacturer'], node['chassis']['name'], node['chassis']['serial']].join(' ')
# ETH
ni_mountable = node['network_adapters'].select { |na| /^eth[0-9]*$/.match(na['device']) && (na['enabled'] == true && (na['mounted'] == true || na['mountable'] == true)) }
ni_fastest = ni_mountable.max_by { |na| na['rate'] || 0 }
h['eth_count'] = ni_mountable.length
h['eth_kavlan_count'] = ni_mountable.select { |na| na['kavlan'] == true }.length
h['eth_rate'] = ni_fastest['rate'] / 1_000_000_000
puts "#{node_uid}: Warning - no rate info for the eth interface" if h['eth_count'] > 0 && h['eth_rate'] == 0
# INFINIBAND
ni_mountable = node['network_adapters'].select { |na| /^ib[0-9]*(\.[0-9]*)?$/.match(na['device']) && (na['interface'] == 'InfiniBand' and na['enabled'] == true && (na['mounted'] == true || na['mountable'] == true)) }
ni_fastest = ni_mountable.max_by { |na| na['rate'] || 0 }
# https://en.wikipedia.org/wiki/InfiniBand
ib_map = { 0 => 'NO', 10 => 'SDR', 20 => 'DDR', 40 => 'QDR', 56 => 'FDR', 100 => 'EDR', 200 => 'HDR' }
h['ib_count'] = ni_mountable.length
h['ib_rate'] = ni_mountable.length > 0 ? ni_fastest['rate'] / 1_000_000_000 : 0
h['ib'] = ib_map[h['ib_rate']]
puts "#{node_uid}: Warning - unkwnon ib kind for rate #{h['ib_rate']}, update ib_map variable" if not ib_map.key?(h['ib_rate'])
puts "#{node_uid}: Warning - no rate info for the ib interface" if h['ib_count'] > 0 && h['ib_rate'] == 0
# OMNIPATH
ni_mountable = node['network_adapters'].select { |na| /^ib[0-9]*(\.[0-9]*)?$/.match(na['device']) && (na['interface'] == 'Omni-Path' and na['enabled'] == true && (na['mounted'] == true || na['mountable'] == true)) }
ni_fastest = ni_mountable.max_by { |na| na['rate'] || 0 }
h['opa_count'] = ni_mountable.length
h['opa_rate'] = ni_mountable.length > 0 ? ni_fastest['rate'] / 1_000_000_000 : 0
puts "#{node_uid}: Warning - no rate info for the opa interface" if h['opa_count'] > 0 && h['opa_rate'] == 0
# MYRINET
ni_mountable = node['network_adapters'].select { |na| /^myri[0-9]*$/.match(na['device']) && (na['enabled'] == true && (na['mounted'] == true || na['mountable'] == true)) }
ni_fastest = ni_mountable.max_by { |na| na['rate'] || 0 }
myri_map = { 0 => 'NO', 2 => 'Myrinet-2000', 10 => 'Myri-10G' }
h['myri_count'] = ni_mountable.length
h['myri_rate'] = ni_mountable.length > 0 ? ni_fastest['rate'] / 1_000_000_000 : 0
h['myri'] = myri_map[h['myri_rate']]
puts "#{node_uid}: Warning - no rate info for the myri interface" if h['myri_count'] > 0 && h['myri_rate'] == 0
h['memcore'] = node['main_memory']['ram_size'] / node['architecture']['nb_cores']/MiB
h['memcpu'] = node['main_memory']['ram_size'] / node['architecture']['nb_procs']/MiB
h['memnode'] = node['main_memory']['ram_size'] / MiB
h['gpu_model'] = ''
h['gpu_count'] = 0
h['gpu_mem'] = 0
h['gpu_compute_capability'] = 'N/A'
h['gpu_compute_capability_major'] = 0
if node.key?('gpu_devices')
models = node['gpu_devices'].map { |_, g| g['model'] }.uniq
if models.length > 1
raise "Node #{h['uid']} has more than one model of GPU"
# Generates the properties of a single node
def get_ref_node_properties_internal(cluster_uid, cluster, node_uid, node)
h = {}
main_network_adapter = node['network_adapters'].find do |na|
/^eth[0-9]*$/.match(na['device']) && na['enabled'] && na['mounted'] && !na['management']
end
unless main_network_adapter
raise MissingProperty,
"Node #{node_uid} does not have a main network_adapter (ie. an ethernet interface with enabled=true && mounted==true && management==false)"
end
h['ip'] = main_network_adapter['ip']
raise MissingProperty, "Node #{node_uid} has no IP" unless h['ip']
h['cluster'] = cluster_uid
h['nodemodel'] = cluster['model']
h['switch'] = main_network_adapter['switch']
h['besteffort'] = node['supported_job_types']['besteffort']
h['deploy'] = node['supported_job_types']['deploy']
h['virtual'] = node['supported_job_types']['virtual']
h['cpuarch'] = node['architecture']['platform_type']
h['cpucore'] = node['architecture']['nb_cores'] / node['architecture']['nb_procs']
h['cpu_count'] = node['architecture']['nb_procs']
h['core_count'] = node['architecture']['nb_cores']
h['thread_count'] = node['architecture']['nb_threads']
h['cputype'] = [node['processor']['model'], node['processor']['version']].join(' ')
h['cpufreq'] = node['processor']['clock_speed'] / 1_000_000_000.0
h['disktype'] = [node['storage_devices'].first['interface'], node['storage_devices'].first['storage']].join('/')
h['chassis'] = [node['chassis']['manufacturer'], node['chassis']['name'], node['chassis']['serial']].join(' ')
# ETH
ni_mountable = node['network_adapters'].select do |na|
/^eth[0-9]*$/.match(na['device']) && (na['enabled'] == true && (na['mounted'] == true || na['mountable'] == true))
end
ni_fastest = ni_mountable.max_by { |na| na['rate'] || 0 }
h['eth_count'] = ni_mountable.length
h['eth_kavlan_count'] = ni_mountable.select { |na| na['kavlan'] == true }.length
h['eth_rate'] = ni_fastest['rate'] / 1_000_000_000
puts "#{node_uid}: Warning - no rate info for the eth interface" if h['eth_count'] > 0 && h['eth_rate'] == 0
# INFINIBAND
ni_mountable = node['network_adapters'].select do |na|
/^ib[0-9]*(\.[0-9]*)?$/.match(na['device']) && (na['interface'] == 'InfiniBand' and na['enabled'] == true && (na['mounted'] == true || na['mountable'] == true))
end
ni_fastest = ni_mountable.max_by { |na| na['rate'] || 0 }
# https://en.wikipedia.org/wiki/InfiniBand
ib_map = { 0 => 'NO', 10 => 'SDR', 20 => 'DDR', 40 => 'QDR', 56 => 'FDR', 100 => 'EDR', 200 => 'HDR' }
h['ib_count'] = ni_mountable.length
h['ib_rate'] = ni_mountable.length > 0 ? ni_fastest['rate'] / 1_000_000_000 : 0
h['ib'] = ib_map[h['ib_rate']]
unless ib_map.key?(h['ib_rate'])
puts "#{node_uid}: Warning - unkwnon ib kind for rate #{h['ib_rate']}, update ib_map variable"
end
device = node['gpu_devices'].first[1]
if GPURef.is_gpu_supported?(device)
h['gpu_model'] = device['model']
h['gpu_count'] = node['gpu_devices'].length
h['gpu_mem'] = device['memory'] / MiB
if not ['AMD'].include?(device['vendor']) # not supported on AMD GPUs
h['gpu_compute_capability'] = device['compute_capability']
h['gpu_compute_capability_major'] = device['compute_capability'].split('.').first.to_i
puts "#{node_uid}: Warning - no rate info for the ib interface" if h['ib_count'] > 0 && h['ib_rate'] == 0
# OMNIPATH
ni_mountable = node['network_adapters'].select do |na|
/^ib[0-9]*(\.[0-9]*)?$/.match(na['device']) && (na['interface'] == 'Omni-Path' and na['enabled'] == true && (na['mounted'] == true || na['mountable'] == true))
end
ni_fastest = ni_mountable.max_by { |na| na['rate'] || 0 }
h['opa_count'] = ni_mountable.length
h['opa_rate'] = ni_mountable.length > 0 ? ni_fastest['rate'] / 1_000_000_000 : 0
puts "#{node_uid}: Warning - no rate info for the opa interface" if h['opa_count'] > 0 && h['opa_rate'] == 0
# MYRINET
ni_mountable = node['network_adapters'].select do |na|
/^myri[0-9]*$/.match(na['device']) && (na['enabled'] == true && (na['mounted'] == true || na['mountable'] == true))
end
ni_fastest = ni_mountable.max_by { |na| na['rate'] || 0 }
myri_map = { 0 => 'NO', 2 => 'Myrinet-2000', 10 => 'Myri-10G' }
h['myri_count'] = ni_mountable.length
h['myri_rate'] = ni_mountable.length > 0 ? ni_fastest['rate'] / 1_000_000_000 : 0
h['myri'] = myri_map[h['myri_rate']]
puts "#{node_uid}: Warning - no rate info for the myri interface" if h['myri_count'] > 0 && h['myri_rate'] == 0
h['memcore'] = node['main_memory']['ram_size'] / node['architecture']['nb_cores'] / MiB
h['memcpu'] = node['main_memory']['ram_size'] / node['architecture']['nb_procs'] / MiB
h['memnode'] = node['main_memory']['ram_size'] / MiB
h['gpu_model'] = ''
h['gpu_count'] = 0
h['gpu_mem'] = 0
h['gpu_compute_capability'] = 'N/A'
h['gpu_compute_capability_major'] = 0
if node.key?('gpu_devices')
models = node['gpu_devices'].map { |_, g| g['model'] }.uniq
raise "Node #{h['uid']} has more than one model of GPU" if models.length > 1
device = node['gpu_devices'].first[1]
if GPURef.is_gpu_supported?(device)
h['gpu_model'] = device['model']
h['gpu_count'] = node['gpu_devices'].length
h['gpu_mem'] = device['memory'] / MiB
unless ['AMD'].include?(device['vendor']) # not supported on AMD GPUs
h['gpu_compute_capability'] = device['compute_capability']
h['gpu_compute_capability_major'] = device['compute_capability'].split('.').first.to_i
end
end
end
end
if node.key?('exotic')
h['exotic'] = node['exotic']
else
h['exotic'] = false
end
h['exotic'] = if node.key?('exotic')
node['exotic']
else
false
end
h['mic'] = if node['mic']
'YES'
else
'NO'
end
h['wattmeter'] = if cluster.fetch('metrics', []).any? do |metric|
metric['name'].match(/wattmetre_power_watt|pdu_outlet_power_watt/)
end
'YES'
else
'NO'
end
h['cluster_priority'] = (cluster['priority'] || Time.parse(cluster['created_at'].to_s).strftime('%Y%m')).to_i
h['max_walltime'] = 0 # default
if node['supported_job_types'] && node['supported_job_types'].has_key?('max_walltime')
h['max_walltime'] =
node['supported_job_types']['max_walltime']
end
h['mic'] = if node['mic']
'YES'
else
'NO'
end
h['production'] = get_production_property(node)
h['maintenance'] = get_maintenance_property(node)
h['wattmeter'] = cluster.fetch('metrics', []).any?{|metric| metric['name'].match(/wattmetre_power_watt|pdu_outlet_power_watt/)} ? "YES" : "NO"
# Disk reservation
h['disk_reservation_count'] = node['storage_devices'].select { |v| v['reservation'] }.length
h['cluster_priority'] = (cluster['priority'] || Time.parse(cluster['created_at'].to_s).strftime('%Y%m')).to_i
# convert booleans to YES/NO string
h.each do |k, v|
if v == true
h[k] = 'YES'
elsif v == false
h[k] = 'NO'
elsif v.is_a? Float
h[k] = v.to_s
end
end
h['max_walltime'] = 0 # default
h['max_walltime'] = node['supported_job_types']['max_walltime'] if node['supported_job_types'] && node['supported_job_types'].has_key?('max_walltime')
h
end
h['production'] = get_production_property(node)
h['maintenance'] = get_maintenance_property(node)
def get_production_property(node)
production = false # default
if node['supported_job_types'] && node['supported_job_types'].has_key?('queues')
production = node['supported_job_types']['queues'].include?('production')
end
production == true ? 'YES' : 'NO'
end
# Disk reservation
h['disk_reservation_count'] = node['storage_devices'].select { |v| v['reservation'] }.length
def get_maintenance_property(node)
maintenance = false # default
if node['supported_job_types'] && node['supported_job_types'].has_key?('queues')
maintenance = node['supported_job_types']['queues'].include?('testing')
end
maintenance == true ? 'YES' : 'NO'
end
# convert booleans to YES/NO string
h.each do |k, v|
if v == true
h[k] = 'YES'
elsif v == false
h[k] = 'NO'
elsif v.is_a? Float
h[k] = v.to_s
# Return a list of properties as a hash: { property1 => String, property2 => Integer, ... }
# We detect the type of the property (Integer/String) by looking at the existing values
def get_property_keys(properties)
properties_keys = {}
properties.each do |type, type_properties|
properties_keys.merge!(get_property_keys_internal(type, type_properties))
end
properties_keys
end
def properties_internal(properties)
properties
.to_a
.select { |k, _v| !ignore_default_keys.include? k }
.map do |(k, v)|
v = 'YES' if v == true
v = 'NO' if v == false
!v.nil? ? "#{k}=#{v.inspect.gsub("'", "\\'").gsub('"', "'")}" : nil
end.compact.join(' -p ')
end
return h
end
def get_production_property(node)
production = false # default
production = node['supported_job_types']['queues'].include?('production') if node['supported_job_types'] && node['supported_job_types'].has_key?('queues')
production = production == true ? 'YES' : 'NO'
return production
end
def get_maintenance_property(node)
maintenance = false # default
maintenance = node['supported_job_types']['queues'].include?('testing') if node['supported_job_types'] && node['supported_job_types'].has_key?('queues')
maintenance = maintenance == true ? 'YES' : 'NO'
return maintenance
end
# Return a list of properties as a hash: { property1 => String, property2 => Integer, ... }
# We detect the type of the property (Integer/String) by looking at the existing values
def get_property_keys(properties)
properties_keys = {}
properties.each do |type, type_properties|
properties_keys.merge!(get_property_keys_internal(type, type_properties))
def disk_properties_internal(properties)
properties
.to_a
.select { |_k, v| !v.nil? and v != '' }
.map do |(k, v)|
v = 'YES' if v == true
v = 'NO' if v == false
!v.nil? ? "#{k}=#{v.inspect.gsub("'", "\\'").gsub('"', "'")}" : nil
end.compact.join(' -p ')
end
return properties_keys
end
def properties_internal(properties)
str = properties
.to_a
.select{|k, _v| not ignore_default_keys.include? k}
.map do |(k, v)|
v = "YES" if v == true
v = "NO" if v == false
!v.nil? ? "#{k}=#{v.inspect.gsub("'", "\\'").gsub("\"", "'")}" : nil
end.compact.join(' -p ')
return str
end
def disk_properties_internal(properties)
str = properties
.to_a
.select{|_k, v| not v.nil? and not v==""}
.map do |(k, v)|
v = "YES" if v == true
v = "NO" if v == false
!v.nil? ? "#{k}=#{v.inspect.gsub("'", "\\'").gsub("\"", "'")}" : nil
end.compact.join(' -p ')
return str
end
# Returns the expected properties of the reservable disks. These
# properties are then compared with the values in OAR database, to
# generate a diff.
# The key is of the form [node, disk]. In the following example
# we list the different disks (from disk1 to disk5) of node grimoire-1.
# {["grimoire-1", "disk1.grimoire-1"]=>
# {"cluster"=>"grimoire",
# "host"=>"grimoire-1.nancy.grid5000.fr",
# "network_address"=>"",
# "available_upto"=>0,
# "deploy"=>"YES",
# "production"=>"NO",
# "maintenance"=>"NO",
# "disk"=>"disk1.grimoire-1",
# "diskpath"=>"/dev/disk/by-path/pci-0000:02:00.0-scsi-0:0:1:0",
# "cpuset"=>-1},
# ["grimoire-1", "disk2.grimoire-1"]=> ...
def get_ref_disk_properties_internal(site_uid, cluster_uid, cluster, node_uid, node)
properties = {}
node['storage_devices'].each_with_index do |device, index|
disk = [device['id'], node_uid].join('.')
if index > 0 && device['reservation'] # index > 0 is used to exclude disk0
# Returns the expected properties of the reservable disks. These
# properties are then compared with the values in OAR database, to
# generate a diff.
# The key is of the form [node, disk]. In the following example
# we list the different disks (from disk1 to disk5) of node grimoire-1.
# {["grimoire-1", "disk1.grimoire-1"]=>
# {"cluster"=>"grimoire",
# "host"=>"grimoire-1.nancy.grid5000.fr",
# "network_address"=>"",
# "available_upto"=>0,
# "deploy"=>"YES",
# "production"=>"NO",
# "maintenance"=>"NO",
# "disk"=>"disk1.grimoire-1",
# "diskpath"=>"/dev/disk/by-path/pci-0000:02:00.0-scsi-0:0:1:0",
# "cpuset"=>-1},
# ["grimoire-1", "disk2.grimoire-1"]=> ...
def get_ref_disk_properties_internal(site_uid, cluster_uid, cluster, node_uid, node)
properties = {}
node['storage_devices'].each_with_index do |device, index|
disk = [device['id'], node_uid].join('.')
next unless index > 0 && device['reservation'] # index > 0 is used to exclude disk0
key = [node_uid, disk]
# Start by copying the properties of the resource of type default,
# because to reserve both the resources of type disk and of type default
......@@ -570,892 +582,874 @@ def get_ref_disk_properties_internal(site_uid, cluster_uid, cluster, node_uid, n
h['cpuset'] = -1
properties[key] = h
end
properties
end
properties
end
def get_oar_default_properties(site_uid, options)
oarnodes = get_oar_data(site_uid, options)
oarnodes = oarnodes.select { |v| v['type'] == 'default' }.map { |v| [get_ids(v['host'])['node_uid'], v] }.to_h
return oarnodes
end
def get_oar_disk_properties(site_uid, options)
oarnodes = get_oar_data(site_uid, options)
oarnodes = oarnodes.select { |v| v['type'] == 'disk' }.map { |v| [[get_ids(v['host'])['node_uid'], v['disk']], v] }.to_h
return oarnodes
end
# Get all data from the OAR database
def get_oar_data(site_uid, options)
data_key = [site_uid,options].to_s
# is data already in cache ?
if @@oar_data[data_key].nil?
# If no API URL is given, set a default URL on https://api.grid5000.fr
if not options[:api][:uri]
options[:api][:uri] = "https://api.grid5000.fr"
end
# Preparing the URL that will be used to fetch OAR resources
if options[:api][:uri].include? "api.grid5000.fr"
api_uri = URI.parse('https://api.grid5000.fr/stable/sites/' + site_uid + '/internal/oarapi/resources/details.json?limit=999999')
elsif options[:api][:uri].include? "api-ext.grid5000.fr"
api_uri = URI.parse('https://api-ext.grid5000.fr/stable/sites/' + site_uid + '/internal/oarapi/resources/details.json?limit=999999')
else
api_uri = URI.parse(options[:api][:uri]+'/oarapi/resources/details.json?limit=999999')
end
def get_oar_default_properties(site_uid, options)
oarnodes = get_oar_data(site_uid, options)
oarnodes.select { |v| v['type'] == 'default' }.map { |v| [get_ids(v['host'])['node_uid'], v] }.to_h
end
# Download the OAR properties from the OAR API (through G5K API)
http = Net::HTTP.new(api_uri.host, api_uri.port)
if api_uri.scheme == "https"
http.use_ssl = true
end
request = Net::HTTP::Get.new(api_uri.request_uri, {'User-Agent' => 'reference-repository/gen/oar-properties'})
def get_oar_disk_properties(site_uid, options)
oarnodes = get_oar_data(site_uid, options)
oarnodes.select do |v|
v['type'] == 'disk'
end.map { |v| [[get_ids(v['host'])['node_uid'], v['disk']], v] }.to_h
end
# For outside g5k network access
if options[:api][:user] && options[:api][:pwd]
request.basic_auth(options[:api][:user], options[:api][:pwd])
end
# Get all data from the OAR database
def get_oar_data(site_uid, options)
data_key = [site_uid, options].to_s
# is data already in cache ?
if @@oar_data[data_key].nil?
# If no API URL is given, set a default URL on https://api.grid5000.fr
options[:api][:uri] = 'https://api.grid5000.fr' unless options[:api][:uri]
# Preparing the URL that will be used to fetch OAR resources
if options[:api][:uri].include? 'api.grid5000.fr'
api_uri = URI.parse('https://api.grid5000.fr/stable/sites/' + site_uid + '/internal/oarapi/resources/details.json?limit=999999')
elsif options[:api][:uri].include? 'api-ext.grid5000.fr'
api_uri = URI.parse('https://api-ext.grid5000.fr/stable/sites/' + site_uid + '/internal/oarapi/resources/details.json?limit=999999')
else
api_uri = URI.parse(options[:api][:uri] + '/oarapi/resources/details.json?limit=999999')
end
response = http.request(request)
raise "Failed to fetch resources properties from API: \n#{response.body}\n" unless response.code.to_i == 200
# Download the OAR properties from the OAR API (through G5K API)
http = Net::HTTP.new(api_uri.host, api_uri.port)
http.use_ssl = true if api_uri.scheme == 'https'
request = Net::HTTP::Get.new(api_uri.request_uri, { 'User-Agent' => 'reference-repository/gen/oar-properties' })
oarnodes = JSON.parse(response.body)
# For outside g5k network access
request.basic_auth(options[:api][:user], options[:api][:pwd]) if options[:api][:user] && options[:api][:pwd]
# Adapt from the format of the OAR API
oarnodes = oarnodes['items'] if oarnodes.key?('items')
@@oar_data[data_key] = oarnodes
end
# Return a deep copy of the cached value as it will be modified in place
return Marshal.load(Marshal.dump(@@oar_data[data_key]))
end
def get_property_keys_internal(_type, type_properties)
properties_keys = {}
type_properties.each do |key, node_properties|
# Differenciate between 'default' type (key = node_uid)
# and 'disk' type (key = [node_uid, disk_id])
node_uid, = key
next if node_uid.nil?
node_properties.each do |k, v|
next if properties_keys.key?(k)
next if NilClass === v
# also skip detection if 'v == false' because it seems that if a varchar property
# only as 'NO' values, it might be interpreted as a boolean
# (see the ib property at nantes: ib: NO in the YAML instead of ib: 'NO')
next if v == false
properties_keys[k] = v.class
response = http.request(request)
raise "Failed to fetch resources properties from API: \n#{response.body}\n" unless response.code.to_i == 200
oarnodes = JSON.parse(response.body)
# Adapt from the format of the OAR API
oarnodes = oarnodes['items'] if oarnodes.key?('items')
@@oar_data[data_key] = oarnodes
end
# Return a deep copy of the cached value as it will be modified in place
Marshal.load(Marshal.dump(@@oar_data[data_key]))
end
return properties_keys
end
def diff_properties(type, properties_oar, properties_ref)
properties_oar ||= {}
properties_ref ||= {}
if type == 'default'
ignore_keys = ignore_keys()
elsif type == 'disk'
check_keys = %w(cluster host network_address available_upto deploy production maintenance disk diskpath cpuset)
ignore_keys = ignore_keys() - check_keys #Some key must be ignored for default but not for disks, ex: available_upto
end
ignore_keys.each { |key| properties_oar.delete(key) }
ignore_keys.each { |key| properties_ref.delete(key) }
return Hashdiff.diff(properties_oar, properties_ref)
end
# These keys will not be created neither compared with the -d option
# ignore_default_keys is only applied to resources of type 'default'
def ignore_default_keys()
# default OAR at resource creation:
# available_upto: '2147483647'
# besteffort: 'YES'
# core: ~
# cpu: ~
# cpuset: 0
# deploy: 'NO'
# desktop_computing: 'NO'
# drain: 'NO'
# expiry_date: 0
# finaud_decision: 'YES'
# host: ~
# last_available_upto: 0
# last_job_date: 0
# network_address: server
# next_finaud_decision: 'NO'
# next_state: UnChanged
# resource_id: 9
# scheduler_priority: 0
# state: Suspected
# state_num: 3
# suspended_jobs: 'NO'
# type: default
ignore_default_keys = [
"slash_16",
"slash_17",
"slash_18",
"slash_19",
"slash_20",
"slash_21",
"slash_22",
"available_upto",
"chunks",
"comment", # TODO
"core", # This property was created by 'oar_resources_add'
"cpu", # This property was created by 'oar_resources_add'
"host", # This property was created by 'oar_resources_add'
"gpudevice", # New property taken into account by the new generator
"gpu", # New property taken into account by the new generator
"cpuset",
"desktop_computing",
"drain",
"expiry_date",
"finaud_decision",
"grub",
"jobs", # This property exists when a job is running
"last_available_upto",
"last_job_date",
"network_address", # TODO
"next_finaud_decision",
"next_state",
"rconsole", # TODO
"resource_id",
"scheduler_priority",
"state",
"state_num",
"subnet_address",
"subnet_prefix",
"suspended_jobs",
"thread",
"type", # TODO
"vlan",
"pdu",
"id", # id from API (= resource_id from oarnodes)
"api_timestamp", # from API
"links", # from API
]
return ignore_default_keys
end
# Properties of resources of type 'disk' to ignore (for example, when
# comparing resources of type 'default' with the -d option)
def ignore_disk_keys()
ignore_disk_keys = [
"disk",
"diskpath"
]
return ignore_disk_keys
end
def ignore_keys()
return ignore_default_keys() + ignore_disk_keys()
end
# Properties such as deploy and besteffort, that should not be created
def oar_system_keys()
[
'deploy',
'besteffort'
]
end
def get_oar_resources_from_oar(options)
properties = {}
options.fetch(:sites).each do |site_uid|
properties[site_uid] = {
'resources' => get_oar_data(site_uid, options)
}
end
properties
end
# sudo exec
def run_commands_via_ssh(cmds, options, verbose=true)
# The following is equivalent to : "cat cmds | bash"
res = ""
c = Net::SSH.start(options[:ssh][:host], options[:ssh][:user])
c.open_channel { |channel|
channel.exec('sudo bash') { |_ch, _success|
# stdout
channel.on_data { |_ch2, data|
if verbose
puts data
end
res += data
}
# stderr
channel.on_extended_data do |_ch2, _type, data|
if verbose
puts data
end
res += data
end
channel.send_data cmds
channel.eof!
}
}
c.loop
c.close
return res
end
# Get the properties of each node
def get_oar_properties_from_the_ref_repo(global_hash, options)
properties = {}
options.fetch(:sites).each do |site_uid|
properties[site_uid] = {
'default' => get_ref_default_properties(site_uid, global_hash.fetch('sites').fetch(site_uid)),
'disk' => get_ref_disk_properties(site_uid, global_hash.fetch('sites').fetch(site_uid))
}
end
properties
end
def get_oar_properties_from_oar(options)
properties = {}
options.fetch(:sites).each do |site_uid|
properties[site_uid] = {
'default' => get_oar_default_properties(site_uid, options),
'disk' => get_oar_disk_properties(site_uid, options)
}
def get_property_keys_internal(_type, type_properties)
properties_keys = {}
type_properties.each do |key, node_properties|
# Differenciate between 'default' type (key = node_uid)
# and 'disk' type (key = [node_uid, disk_id])
node_uid, = key
next if node_uid.nil?
node_properties.each do |k, v|
next if properties_keys.key?(k)
next if v.is_a?(NilClass)
# also skip detection if 'v == false' because it seems that if a varchar property
# only as 'NO' values, it might be interpreted as a boolean
# (see the ib property at nantes: ib: NO in the YAML instead of ib: 'NO')
next if v == false
properties_keys[k] = v.class
end
end
properties_keys
end
properties
end
def do_diff(options, generated_hierarchy, refrepo_properties)
raise if options[:update] or options[:print] # no longer supported, was never really supported
def diff_properties(type, properties_oar, properties_ref)
properties_oar ||= {}
properties_ref ||= {}
ret = 0
if type == 'default'
ignore_keys = ignore_keys()
elsif type == 'disk'
check_keys = %w[cluster host network_address available_upto deploy production maintenance disk diskpath cpuset]
ignore_keys = ignore_keys() - check_keys # Some key must be ignored for default but not for disks, ex: available_upto
end
ignore_keys.each { |key| properties_oar.delete(key) }
ignore_keys.each { |key| properties_ref.delete(key) }
diagnostic_msgs = []
Hashdiff.diff(properties_oar, properties_ref)
end
properties = {
'ref' => refrepo_properties,
'oar' => get_oar_properties_from_oar(options)
}
# These keys will not be created neither compared with the -d option
# ignore_default_keys is only applied to resources of type 'default'
def ignore_default_keys
# default OAR at resource creation:
# available_upto: '2147483647'
# besteffort: 'YES'
# core: ~
# cpu: ~
# cpuset: 0
# deploy: 'NO'
# desktop_computing: 'NO'
# drain: 'NO'
# expiry_date: 0
# finaud_decision: 'YES'
# host: ~
# last_available_upto: 0
# last_job_date: 0
# network_address: server
# next_finaud_decision: 'NO'
# next_state: UnChanged
# resource_id: 9
# scheduler_priority: 0
# state: Suspected
# state_num: 3
# suspended_jobs: 'NO'
# type: default
[
'slash_16',
'slash_17',
'slash_18',
'slash_19',
'slash_20',
'slash_21',
'slash_22',
'available_upto',
'chunks',
'comment', # TODO
'core', # This property was created by 'oar_resources_add'
'cpu', # This property was created by 'oar_resources_add'
'host', # This property was created by 'oar_resources_add'
'gpudevice', # New property taken into account by the new generator
'gpu', # New property taken into account by the new generator
'cpuset',
'desktop_computing',
'drain',
'expiry_date',
'finaud_decision',
'grub',
'jobs', # This property exists when a job is running
'last_available_upto',
'last_job_date',
'network_address', # TODO
'next_finaud_decision',
'next_state',
'rconsole', # TODO
'resource_id',
'scheduler_priority',
'state',
'state_num',
'subnet_address',
'subnet_prefix',
'suspended_jobs',
'thread',
'type', # TODO
'vlan',
'pdu',
'id', # id from API (= resource_id from oarnodes)
'api_timestamp', # from API
'links' # from API
]
end
# Get the list of property keys from the reference-repo (['ref'])
properties_keys = {
'ref' => {},
'oar' => {},
'diff' => {}
}
# Properties of resources of type 'disk' to ignore (for example, when
# comparing resources of type 'default' with the -d option)
def ignore_disk_keys
%w[
disk
diskpath
]
end
options[:sites].each do |site_uid|
properties_keys['ref'][site_uid] = get_property_keys(properties['ref'][site_uid])
def ignore_keys
ignore_default_keys + ignore_disk_keys
end
ignore_default_keys = ignore_default_keys()
# Build the list of nodes that are listed in properties['oar'],
# but does not exist in properties['ref']
missings_alive = []
missings_dead = []
properties['oar'].each do |site_uid, site_properties|
site_properties['default'].each_filtered_node_uid(options[:clusters], options[:nodes]) do |node_uid, node_properties_oar|
unless properties['ref'][site_uid]['default'][node_uid]
if node_properties_oar['state'] != 'Dead'
missings_alive << node_uid
else
missings_dead << node_uid
# Properties such as deploy and besteffort, that should not be created
def oar_system_keys
%w[
deploy
besteffort
]
end
def get_oar_resources_from_oar(options)
properties = {}
options.fetch(:sites).each do |site_uid|
properties[site_uid] = {
'resources' => get_oar_data(site_uid, options)
}
end
properties
end
# sudo exec
def run_commands_via_ssh(cmds, options, verbose = true)
# The following is equivalent to : "cat cmds | bash"
res = ''
c = Net::SSH.start(options[:ssh][:host], options[:ssh][:user])
c.open_channel do |channel|
channel.exec('sudo bash') do |_ch, _success|
# stdout
channel.on_data do |_ch2, data|
puts data if verbose
res += data
end
# stderr
channel.on_extended_data do |_ch2, _type, data|
puts data if verbose
res += data
end
channel.send_data cmds
channel.eof!
end
end
c.loop
c.close
res
end
if missings_alive.size > 0
diagnostic_msgs.push("*** Error: The following nodes exist in the OAR server but are missing in the reference-repo: #{missings_alive.join(', ')}.\n")
ret = false
# Get the properties of each node
def get_oar_properties_from_the_ref_repo(global_hash, options)
properties = {}
options.fetch(:sites).each do |site_uid|
properties[site_uid] = {
'default' => get_ref_default_properties(site_uid, global_hash.fetch('sites').fetch(site_uid)),
'disk' => get_ref_disk_properties(site_uid, global_hash.fetch('sites').fetch(site_uid))
}
end
properties
end
prev_diff = {}
properties['diff'] = {}
def get_oar_properties_from_oar(options)
properties = {}
options.fetch(:sites).each do |site_uid|
properties[site_uid] = {
'default' => get_oar_default_properties(site_uid, options),
'disk' => get_oar_disk_properties(site_uid, options)
}
end
properties
end
header = false
properties['ref'].each do |site_uid, site_properties|
properties['diff'][site_uid] = {}
site_properties.each do |type, type_properties|
properties['diff'][site_uid][type] = {}
type_properties.each_filtered_node_uid(options[:clusters], options[:nodes]) do |key, properties_ref|
# As an example, key can be equal to 'grimoire-1' for default resources or
# ['grimoire-1', 1] for disk resources (disk n°1 of grimoire-1)
def do_diff(options, generated_hierarchy, refrepo_properties)
raise if options[:update] or options[:print] # no longer supported, was never really supported
properties_oar = properties['oar'][site_uid][type][key]
ret = 0
diff = diff_properties(type, properties_oar, properties_ref) # Note: this deletes some properties from the input parameters
diff_keys = diff.map { |hashdiff_array| hashdiff_array[1] }
properties['diff'][site_uid][type][key] = properties_ref.select { |k, _v| diff_keys.include?(k) }
diagnostic_msgs = []
if properties['oar'][site_uid][type][key].nil?
info = ((type == 'default') ? ' new node !' : ' new disk !')
else
info = ''
end
properties = {
'ref' => refrepo_properties,
'oar' => get_oar_properties_from_oar(options)
}
if header == false
# output header only once
diagnostic_msgs.push( "Output format: [ '-', 'key', 'value'] for missing, [ '+', 'key', 'value'] for added, ['~', 'key', 'old value', 'new value'] for changed")
header = true
end
if diff.empty?
diagnostic_msgs.push( " #{key}: OK#{info}")
elsif diff == prev_diff
diagnostic_msgs.push( " #{key}:#{info} same modifications as above")
else
diagnostic_msgs.push( " #{key}:#{info}")
diff.each { |d| diagnostic_msgs.push( " #{d}") }
end
prev_diff = diff
if diff.size != 0
ret = false
# Get the list of property keys from the reference-repo (['ref'])
properties_keys = {
'ref' => {},
'oar' => {},
'diff' => {}
}
options[:sites].each do |site_uid|
properties_keys['ref'][site_uid] = get_property_keys(properties['ref'][site_uid])
end
ignore_default_keys = ignore_default_keys()
# Build the list of nodes that are listed in properties['oar'],
# but does not exist in properties['ref']
missings_alive = []
missings_dead = []
properties['oar'].each do |site_uid, site_properties|
site_properties['default'].each_filtered_node_uid(options[:clusters],
options[:nodes]) do |node_uid, node_properties_oar|
unless properties['ref'][site_uid]['default'][node_uid]
if node_properties_oar['state'] != 'Dead'
missings_alive << node_uid
else
missings_dead << node_uid
end
end
end
end
# Get the list of property keys from the OAR scheduler (['oar'])
properties_keys['oar'][site_uid] = get_property_keys(properties['oar'][site_uid])
if missings_alive.size > 0
diagnostic_msgs.push("*** Error: The following nodes exist in the OAR server but are missing in the reference-repo: #{missings_alive.join(', ')}.\n")
ret = false
end
prev_diff = {}
properties['diff'] = {}
header = false
properties['ref'].each do |site_uid, site_properties|
properties['diff'][site_uid] = {}
site_properties.each do |type, type_properties|
properties['diff'][site_uid][type] = {}
type_properties.each_filtered_node_uid(options[:clusters], options[:nodes]) do |key, properties_ref|
# As an example, key can be equal to 'grimoire-1' for default resources or
# ['grimoire-1', 1] for disk resources (disk n°1 of grimoire-1)
properties_oar = properties['oar'][site_uid][type][key]
diff = diff_properties(type, properties_oar, properties_ref) # NOTE: this deletes some properties from the input parameters
diff_keys = diff.map { |hashdiff_array| hashdiff_array[1] }
properties['diff'][site_uid][type][key] = properties_ref.select { |k, _v| diff_keys.include?(k) }
info = if properties['oar'][site_uid][type][key].nil?
(type == 'default' ? ' new node !' : ' new disk !')
else
''
end
if header == false
# output header only once
diagnostic_msgs.push("Output format: [ '-', 'key', 'value'] for missing, [ '+', 'key', 'value'] for added, ['~', 'key', 'old value', 'new value'] for changed")
header = true
end
if diff.empty?
diagnostic_msgs.push(" #{key}: OK#{info}")
elsif diff == prev_diff
diagnostic_msgs.push(" #{key}:#{info} same modifications as above")
else
diagnostic_msgs.push(" #{key}:#{info}")
diff.each { |d| diagnostic_msgs.push(" #{d}") }
end
prev_diff = diff
ret = false if diff.size != 0
end
end
# Get the list of property keys from the OAR scheduler (['oar'])
properties_keys['oar'][site_uid] = get_property_keys(properties['oar'][site_uid])
# Build the list of properties that must be created in the OAR server
properties_keys['diff'][site_uid] = {}
properties_keys['ref'][site_uid].each do |k, v_ref|
v_oar = properties_keys['oar'][site_uid][k]
properties_keys['diff'][site_uid][k] = v_ref unless v_oar
next unless v_oar && v_oar != v_ref && v_ref != NilClass && v_oar != NilClass
# Build the list of properties that must be created in the OAR server
properties_keys['diff'][site_uid] = {}
properties_keys['ref'][site_uid].each do |k, v_ref|
v_oar = properties_keys['oar'][site_uid][k]
properties_keys['diff'][site_uid][k] = v_ref unless v_oar
if v_oar && v_oar != v_ref && v_ref != NilClass && v_oar != NilClass
# Detect inconsistency between the type (String/Integer) of properties generated by this script and the existing values on the server.
diagnostic_msgs.push( "Error: the OAR property '#{k}' is a '#{v_oar}' on the #{site_uid} server and this script uses '#{v_ref}' for this property.")
diagnostic_msgs.push("Error: the OAR property '#{k}' is a '#{v_oar}' on the #{site_uid} server and this script uses '#{v_ref}' for this property.")
ret = false
end
end
diagnostic_msgs.push( "Properties that need to be created on the #{site_uid} server: #{properties_keys['diff'][site_uid].keys.to_a.delete_if { |e| ignore_keys.include?(e) }.join(', ')}") if properties_keys['diff'][site_uid].keys.to_a.delete_if { |e| ignore_keys.include?(e) }.size > 0
# Detect unknown properties
unknown_properties = properties_keys['oar'][site_uid].keys.to_set - properties_keys['ref'][site_uid].keys.to_set
ignore_default_keys.each do |key|
unknown_properties.delete(key)
end
if properties_keys['diff'][site_uid].keys.to_a.delete_if do |e|
ignore_keys.include?(e)
end.size > 0
diagnostic_msgs.push("Properties that need to be created on the #{site_uid} server: #{properties_keys['diff'][site_uid].keys.to_a.delete_if do |e|
ignore_keys.include?(e)
end.join(', ')}")
end
if unknown_properties.size > 0
diagnostic_msgs.push( "Properties existing on the #{site_uid} server but not managed/known by the generator: #{unknown_properties.to_a.join(', ')}.")
diagnostic_msgs.push( "Hint: you can delete properties with 'oarproperty -d <property>' or add them to the ignore list in lib/lib-oar-properties.rb.")
ret = false
end
# Detect unknown properties
unknown_properties = properties_keys['oar'][site_uid].keys.to_set - properties_keys['ref'][site_uid].keys.to_set
ignore_default_keys.each do |key|
unknown_properties.delete(key)
end
if not options[:print]
diagnostic_msgs.map{|msg| puts(msg)}
end
if unknown_properties.size > 0
diagnostic_msgs.push("Properties existing on the #{site_uid} server but not managed/known by the generator: #{unknown_properties.to_a.join(', ')}.")
diagnostic_msgs.push("Hint: you can delete properties with 'oarproperty -d <property>' or add them to the ignore list in lib/lib-oar-properties.rb.")
ret = false
end
# Check that CPUSETs on the OAR server are consistent with what would have been generated
oar_resources = get_oar_resources_from_oar(options)
diagnostic_msgs.map { |msg| puts(msg) } unless options[:print]
error_msgs = ""
# Check that CPUSETs on the OAR server are consistent with what would have been generated
oar_resources = get_oar_resources_from_oar(options)
options[:clusters].each do |cluster|
error_msgs = ''
generated_rows_for_this_cluster = generated_hierarchy[:nodes]
.map{|node| node[:oar_rows]}
.flatten
.select{|r| r[:cluster] == cluster}
options[:clusters].each do |cluster|
generated_rows_for_this_cluster = generated_hierarchy[:nodes]
.map { |node| node[:oar_rows] }
.flatten
.select { |r| r[:cluster] == cluster }
site_resources = oar_resources[site_uid]["resources"]
cluster_resources = site_resources.select{|x| x["cluster"] == cluster}
default_cluster_resources = cluster_resources.select{|r| r["type"] == "default"}
site_resources = oar_resources[site_uid]['resources']
cluster_resources = site_resources.select { |x| x['cluster'] == cluster }
default_cluster_resources = cluster_resources.select { |r| r['type'] == 'default' }
next unless generated_rows_for_this_cluster.length > 0
if generated_rows_for_this_cluster.length > 0
# Check that OAR resources are associated with the right cpu, core and cpuset
generated_rows_for_this_cluster.each do |row|
corresponding_resource = default_cluster_resources.select{|r| r["id"] == row[:resource_id]}
corresponding_resource = default_cluster_resources.select { |r| r['id'] == row[:resource_id] }
if corresponding_resource.length > 0
resc = corresponding_resource[0]
{:cpu => "cpu", :core => "core", :cpuset => "cpuset", :gpu => "gpu", :gpudevice => "gpudevice"}.each do |key, value|
if row[key].to_s != corresponding_resource[0][value].to_s and not (key == :gpu and row[key].nil? and corresponding_resource[0][value] == 0)
expected_value = row[key]
if expected_value == "" or expected_value.nil?
expected_value = "null"
end
diagnostic_msg = <<-TXT
# Error: Resource #{resc["id"]} (host=#{resc["network_address"]} cpu=#{resc["cpu"]} core=#{resc["core"]} cpuset=#{resc["cpuset"]} gpu=#{resc["gpu"]} gpudevice=#{resc["gpudevice"]}) has a mismatch for ressource #{value.upcase}: OAR API gives #{resc[value]}, generator wants #{expected_value}.
TXT
error_msgs += "#{diagnostic_msg}"
{ cpu: 'cpu', core: 'core', cpuset: 'cpuset', gpu: 'gpu',
gpudevice: 'gpudevice' }.each do |key, value|
unless row[key].to_s != corresponding_resource[0][value].to_s and !(key == :gpu and row[key].nil? and corresponding_resource[0][value] == 0)
next
end
expected_value = row[key]
expected_value = 'null' if expected_value == '' or expected_value.nil?
diagnostic_msg = <<~TXT
# Error: Resource #{resc['id']} (host=#{resc['network_address']} cpu=#{resc['cpu']} core=#{resc['core']} cpuset=#{resc['cpuset']} gpu=#{resc['gpu']} gpudevice=#{resc['gpudevice']}) has a mismatch for ressource #{value.upcase}: OAR API gives #{resc[value]}, generator wants #{expected_value}.
TXT
error_msgs += "#{diagnostic_msg}"
end
else
elsif row[:resource_id] != -1
# If resource_id is not equal to -1, then the generator is working on a resource that should exist,
# however it cannot be found : the generator reports an error to the operator
if row[:resource_id] != -1
puts "Error: could not find ressource with ID=#{row[:resource_id]}"
end
puts "Error: could not find ressource with ID=#{row[:resource_id]}"
end
end
end
end
if not options[:print] and not error_msgs.empty?
puts error_msgs
ret = false
if !(options[:print]) and !error_msgs.empty?
puts error_msgs
ret = false
end
end
ret
end
return ret
end
def sanity_check(cluster_resources, site_resources)
# Detect cluster resources
cluster_cpus = cluster_resources.map { |r| r['cpu'] }.uniq
cluster_gpus = cluster_resources.map { |r| r['gpu'] }.select { |gpu| !gpu.nil? }.uniq
cluster_cores = cluster_resources.map { |r| r['core'] }.uniq
def sanity_check(cluster_resources, site_resources)
# Check CPUs
cluster_cpus.each do |cluster_cpu|
hosts_with_same_cpu = site_resources.select { |r| r['cpu'] == cluster_cpu }.map { |r| r['host'] }.uniq
# Detect cluster resources
cluster_cpus = cluster_resources.map{|r| r["cpu"]}.uniq
cluster_gpus = cluster_resources.map{|r| r["gpu"]}.select{|gpu| not gpu.nil?}.uniq
cluster_cores = cluster_resources.map{|r| r["core"]}.uniq
next unless hosts_with_same_cpu.length > 1
# Check CPUs
cluster_cpus.each do |cluster_cpu|
hosts_with_same_cpu = site_resources.select{|r| r["cpu"] == cluster_cpu}.map{|r| r["host"]}.uniq
if hosts_with_same_cpu.length > 1
puts("################################")
puts('################################')
puts("# Error: CPU #{cluster_cpu} is associated to more than one host: #{hosts_with_same_cpu}.")
puts("# You can review this situation via the following command:\n")
puts("################################")
puts('################################')
puts("oarnodes -Y --sql \"cpu=#{cluster_cpu}\"")
puts("")
puts('')
return false
end
end
# Checks GPUs
cluster_gpus.each do |cluster_gpu|
hosts_with_same_gpu = site_resources.select{|r| r["gpu"] == cluster_gpu}.map{|r| r["host"]}.uniq
# Checks GPUs
cluster_gpus.each do |cluster_gpu|
hosts_with_same_gpu = site_resources.select { |r| r['gpu'] == cluster_gpu }.map { |r| r['host'] }.uniq
if hosts_with_same_gpu.length > 1
puts("################################")
next unless hosts_with_same_gpu.length > 1
puts('################################')
puts("# Error: GPU #{cluster_gpu} is associated to more than one host: #{hosts_with_same_gpu}.")
puts("# You can review this situation via the following command:\n")
puts("################################")
puts('################################')
puts("oarnodes -Y --sql \"gpu=#{cluster_gpu}\"")
puts("")
puts('')
return false
end
end
# Check Cores
cluster_cores.each do |cluster_core|
resources_id_with_same_core = site_resources.select{|r| r["core"] == cluster_core}.map{|r| r["id"]}
# Check Cores
cluster_cores.each do |cluster_core|
resources_id_with_same_core = site_resources.select { |r| r['core'] == cluster_core }.map { |r| r['id'] }
if resources_id_with_same_core.length > 1
oar_sql_clause = resources_id_with_same_core.map{|rid| "resource_id='#{rid}'"}.join(" OR ")
next unless resources_id_with_same_core.length > 1
puts("################################")
oar_sql_clause = resources_id_with_same_core.map { |rid| "resource_id='#{rid}'" }.join(' OR ')
puts('################################')
puts("# Error: resources with ids #{resources_id_with_same_core} have the same value for core (core is equal to #{cluster_core})\n")
puts("# You can review this situation via the following command:\n")
puts("################################")
puts('################################')
puts("oarnodes -Y --sql \"#{oar_sql_clause}\"")
puts("")
puts('')
return false
end
end
return true
end
def extract_clusters_description(clusters, site_name, options, data_hierarchy, site_properties)
# This function works as follow:
# (1) Initialization
# (a) Load the local data contained in JSON data files
# (b) Handle the program arguments (detects which site and which clusters
# are targeted), and what action is requested
# (c) Fetch the OAR properties of the requested site
# (2) Generate an OAR node hierarchy
# (a) Iterate over cluster > nodes > cpus > cores
# (b) Detect existing resource_ids and {CPU, CORE, CPUSET, GPU}'s IDs
# (c) [if cluster with GPU] detects the existing mapping GPU <-> CPU in the cluster
# (d) Associate a cpuset to each core
# (e) [if cluster with GPU] Associate a gpuset to each core
true
end
def extract_clusters_description(clusters, site_name, options, data_hierarchy, site_properties)
# This function works as follow:
# (1) Initialization
# (a) Load the local data contained in JSON data files
# (b) Handle the program arguments (detects which site and which clusters
# are targeted), and what action is requested
# (c) Fetch the OAR properties of the requested site
# (2) Generate an OAR node hierarchy
# (a) Iterate over cluster > nodes > cpus > cores
# (b) Detect existing resource_ids and {CPU, CORE, CPUSET, GPU}'s IDs
# (c) [if cluster with GPU] detects the existing mapping GPU <-> CPU in the cluster
# (d) Associate a cpuset to each core
# (e) [if cluster with GPU] Associate a gpuset to each core
############################################
# (1) Initialization
############################################
oar_resources = get_oar_resources_from_oar(options)
############################################
# (1) Initialization
############################################
generated_hierarchy = {
:nodes => []
}
oar_resources = get_oar_resources_from_oar(options)
############################################
# (2) Generate an OAR node hierarchy
############################################
generated_hierarchy = {
nodes: []
}
site_resources = oar_resources[site_name]["resources"].select{|r| r["type"] == "default"}
############################################
# (2) Generate an OAR node hierarchy
############################################
next_rsc_ids = {
"cpu" => site_resources.length > 0 ? site_resources.map{|r| r["cpu"]}.max : 0,
"core" => site_resources.length > 0 ? site_resources.map{|r| r["core"]}.max : 0,
"gpu" => site_resources.length > 0 ? site_resources.map{|r| r["gpu"]}.select{|x| not x.nil?}.max : 0
}
site_resources = oar_resources[site_name]['resources'].select { |r| r['type'] == 'default' }
# Some existing cluster have GPUs, but no GPU ID has been allocated to them
if next_rsc_ids["gpu"].nil?
next_rsc_ids["gpu"] = 0
end
next_rsc_ids = {
'cpu' => site_resources.length > 0 ? site_resources.map { |r| r['cpu'] }.max : 0,
'core' => site_resources.length > 0 ? site_resources.map { |r| r['core'] }.max : 0,
'gpu' => site_resources.length > 0 ? site_resources.map { |r| r['gpu'] }.select { |x| !x.nil? }.max : 0
}
############################################
# (2-a) Iterate over clusters. (rest: servers, cpus, cores)
############################################
# Some existing cluster have GPUs, but no GPU ID has been allocated to them
next_rsc_ids['gpu'] = 0 if next_rsc_ids['gpu'].nil?
clusters.sort.each do |cluster_name|
############################################
# (2-a) Iterate over clusters. (rest: servers, cpus, cores)
############################################
cpu_idx = 0
core_idx = 0
clusters.sort.each do |cluster_name|
cpu_idx = 0
core_idx = 0
unless data_hierarchy['sites'][site_name]['clusters'].include?(cluster_name)
puts("It seems that the cluster \"#{cluster_name}\" does not exist in the API. The generator will abort.")
raise 'Sanity check failed'
end
unless data_hierarchy['sites'][site_name]['clusters'].include?(cluster_name)
puts("It seems that the cluster \"#{cluster_name}\" does not exist in the API. The generator will abort.")
raise 'Sanity check failed'
end
cluster_nodes = data_hierarchy['sites'][site_name]['clusters'][cluster_name]['nodes']
cluster_nodes = data_hierarchy['sites'][site_name]['clusters'][cluster_name]['nodes']
node_count = cluster_nodes.length
node_count = cluster_nodes.length
cluster_resources = site_resources
.select{|r| r["cluster"] == cluster_name}
.select{|r| cluster_nodes.include?(r["host"].split(".")[0])}
.sort_by{|r| [r["cpu"], r["core"]]}
cluster_resources = site_resources
.select { |r| r['cluster'] == cluster_name }
.select { |r| cluster_nodes.include?(r['host'].split('.')[0]) }
.sort_by { |r| [r['cpu'], r['core']] }
sanity_check_result = sanity_check(cluster_resources, site_resources)
unless sanity_check_result
puts("It seems that the cluster \"#{cluster_name}\" has some incoherence in its resource configuration (see above). The generator will abort.")
raise 'Sanity check failed'
end
sanity_check_result = sanity_check(cluster_resources, site_resources)
unless sanity_check_result
puts("It seems that the cluster \"#{cluster_name}\" has some incoherence in its resource configuration (see above). The generator will abort.")
raise 'Sanity check failed'
end
first_node = cluster_nodes.first[1]
first_node = cluster_nodes.first[1]
cpu_count = first_node['architecture']['nb_procs']
cpu_core_count = first_node['architecture']['nb_cores'] / cpu_count
cpu_thread_count = first_node['architecture']['nb_threads'] / cpu_count
core_thread_count = first_node['architecture']['nb_threads'] / first_node['architecture']['nb_cores']
gpu_count = cluster_nodes.values.map { |e| (e['gpu_devices'] || {} ).length }.max
cpu_count = first_node['architecture']['nb_procs']
cpu_core_count = first_node['architecture']['nb_cores'] / cpu_count
cpu_thread_count = first_node['architecture']['nb_threads'] / cpu_count
core_thread_count = first_node['architecture']['nb_threads'] / first_node['architecture']['nb_cores']
gpu_count = cluster_nodes.values.map { |e| (e['gpu_devices'] || {}).length }.max
cpu_model = "#{first_node['processor']['model']} #{first_node['processor']['version']}"
core_numbering = first_node['architecture']['cpu_core_numbering']
cpu_model = "#{first_node['processor']['model']} #{first_node['processor']['version']}"
core_numbering = first_node['architecture']['cpu_core_numbering']
############################################
# (2-b) Detect existing resource_ids and {CPU, CORE, CPUSET, GPU}'s IDs
############################################
############################################
# (2-b) Detect existing resource_ids and {CPU, CORE, CPUSET, GPU}'s IDs
############################################
# <phys_rsc_map> is a hash that centralises variables that will be used for managing IDs of CPUs, COREs, GPUs of
# the cluster that is going to be updated. <phys_rsc_map> is useful to detect situations where the current number
# of resources associated to a cluster does not correspond to the needs of the cluster.
phys_rsc_map = {
"cpu" => {
:current_ids => [],
:per_server_count => cpu_count,
:per_cluster_count => node_count * cpu_count
# <phys_rsc_map> is a hash that centralises variables that will be used for managing IDs of CPUs, COREs, GPUs of
# the cluster that is going to be updated. <phys_rsc_map> is useful to detect situations where the current number
# of resources associated to a cluster does not correspond to the needs of the cluster.
phys_rsc_map = {
'cpu' => {
current_ids: [],
per_server_count: cpu_count,
per_cluster_count: node_count * cpu_count
},
"core" => {
:current_ids => [],
:per_server_count => cpu_core_count,
:per_cluster_count => node_count * cpu_count * cpu_core_count
'core' => {
current_ids: [],
per_server_count: cpu_core_count,
per_cluster_count: node_count * cpu_count * cpu_core_count
},
"gpu" => {
:current_ids => [],
:per_server_count => gpu_count,
:per_cluster_count => cluster_nodes.values.map { |e| (e['gpu_devices'] || {} ).length }.inject(0) { |a, b| a+b } # sum
},
}
'gpu' => {
current_ids: [],
per_server_count: gpu_count,
per_cluster_count: # sum
cluster_nodes.values.map do |e|
(e['gpu_devices'] || {}).length
end.inject(0) { |a, b| a + b }
}
}
# For each physical ressource, we prepare a list of IDs:
# a) we start with the IDs of the list of existing resources
# b) we add more if needed
phys_rsc_map.each do |physical_resource, variables|
current_ids = cluster_resources.map { |r| r[physical_resource] }.select { |x| !x.nil? }.uniq
if current_ids.length > variables[:per_cluster_count]
raise "#{physical_resource}: too many IDs in OAR compared to what we need to generate. This case needs to be fixed manually."
end
# For each physical ressource, we prepare a list of IDs:
# a) we start with the IDs of the list of existing resources
# b) we add more if needed
phys_rsc_map.each do |physical_resource, variables|
current_ids = cluster_resources.map{|r| r[physical_resource]}.select{|x| not x.nil?}.uniq
if current_ids.length > variables[:per_cluster_count]
raise "#{physical_resource}: too many IDs in OAR compared to what we need to generate. This case needs to be fixed manually."
elsif current_ids.length < variables[:per_cluster_count]
current_ids.length < variables[:per_cluster_count]
# there are less resources currently affected to that cluster than what is needed. affect more and bump next_rsc_ids
needed = variables[:per_cluster_count] - current_ids.length
current_ids += [*next_rsc_ids[physical_resource]+1..next_rsc_ids[physical_resource]+needed]
next_rsc_ids[physical_resource] = variables[:per_server_count] > 0 ? variables[:current_ids].max : next_rsc_ids[physical_resource]
else
# that's OK, nothing to do
end
variables[:current_ids] = current_ids
end
oar_resource_ids = cluster_resources.map{|r| r["id"]}.uniq
if oar_resource_ids.length < phys_rsc_map["core"][:current_ids].length
# the cluster has less resource ids than needed (because resources were added to it)
needed = phys_rsc_map["core"][:current_ids].length - oar_resource_ids.length
# -1 here asks to generate the resources, not change them
oar_resource_ids += [*1..needed].map { -1 }
end
# Some cluster (econome) have attributed resources according to the "alpha-numerical" order of nodes
# ([1, 11, 12, ..., 3] instead of [1, 2, 3, 4, ...]). Here we preserve to order of existing nodes of the cluster
nodes_names = cluster_resources.map{|r| r["host"]}.map{|fqdn| {:fqdn => fqdn, :name => fqdn.split(".")[0]}}.uniq
if nodes_names.length != node_count
# some missing nodes. we regenerate the list from scratch
nodes_names = (1..node_count).map {|i| {:name => "#{cluster_name}-#{i}", :fqdn => "#{cluster_name}-#{i}.#{site_name}.grid5000.fr"}}
end
############################################
# Suite of (2-a): Iterate over nodes of the cluster. (rest: cpus, cores)
############################################
current_ids += [*next_rsc_ids[physical_resource] + 1..next_rsc_ids[physical_resource] + needed]
next_rsc_ids[physical_resource] =
variables[:per_server_count] > 0 ? variables[:current_ids].max : next_rsc_ids[physical_resource]
(1..node_count).each do |node_num|
# node_index0 starts at 0
node_index0 = node_num -1
name = nodes_names[node_index0][:name]
fqdn = nodes_names[node_index0][:fqdn]
node_description = cluster_nodes[name]
node_description_default_properties = site_properties["default"][name]
variables[:current_ids] = current_ids
end
if node_description.nil?
next
oar_resource_ids = cluster_resources.map { |r| r['id'] }.uniq
if oar_resource_ids.length < phys_rsc_map['core'][:current_ids].length
# the cluster has less resource ids than needed (because resources were added to it)
needed = phys_rsc_map['core'][:current_ids].length - oar_resource_ids.length
# -1 here asks to generate the resources, not change them
oar_resource_ids += [*1..needed].map { -1 }
end
generated_node_description = {
:name => name,
:fqdn => fqdn,
:cluster_name => cluster_name,
:site_name => site_name,
:description => node_description,
:oar_rows => [],
:disks => [],
:gpus => (node_description["gpu_devices"] != nil ? (node_description["gpu_devices"].select{|_k ,v| v.fetch("reservation", true)}.length) : 0),
:default_description => node_description_default_properties
}
# Some cluster (econome) have attributed resources according to the "alpha-numerical" order of nodes
# ([1, 11, 12, ..., 3] instead of [1, 2, 3, 4, ...]). Here we preserve to order of existing nodes of the cluster
nodes_names = cluster_resources.map { |r| r['host'] }.map { |fqdn| { fqdn:, name: fqdn.split('.')[0] } }.uniq
if nodes_names.length != node_count
# some missing nodes. we regenerate the list from scratch
nodes_names = (1..node_count).map do |i|
{ name: "#{cluster_name}-#{i}", fqdn: "#{cluster_name}-#{i}.#{site_name}.grid5000.fr" }
end
end
############################################
# Suite of (2-a): Iterate over CPUs of the server. (rest: cores)
# Suite of (2-a): Iterate over nodes of the cluster. (rest: cpus, cores)
############################################
(0...cpu_count).each do |cpu_num|
(1..node_count).each do |node_num|
# node_index0 starts at 0
node_index0 = node_num - 1
name = nodes_names[node_index0][:name]
fqdn = nodes_names[node_index0][:fqdn]
node_description = cluster_nodes[name]
node_description_default_properties = site_properties['default'][name]
next if node_description.nil?
generated_node_description = {
name:,
fqdn:,
cluster_name:,
site_name:,
description: node_description,
oar_rows: [],
disks: [],
gpus: (if !node_description['gpu_devices'].nil?
node_description['gpu_devices'].select do |_k, v|
v.fetch('reservation', true)
end.length
else
0
end),
default_description: node_description_default_properties
}
############################################
# Suite of (2-a): Iterate over CORES of the CPU
# Suite of (2-a): Iterate over CPUs of the server. (rest: cores)
############################################
(0...cpu_core_count).each do |core_num|
# Compute cpu and core ID
oar_resource_id = oar_resource_ids[core_idx]
cpu_id = phys_rsc_map["cpu"][:current_ids][cpu_idx]
core_id = phys_rsc_map["core"][:current_ids][core_idx]
# Prepare an Hash that represents a single OAR resource. Few
# keys are initialized with empty values.
row = {
:cluster => cluster_name,
:host => name,
:cpu => cpu_id,
:core => core_id,
:cpuset => nil,
:gpu => nil,
:gpudevice => nil,
:gpudevicepath => nil,
:cpumodel => nil,
:gpumodel => nil,
:oar_properties => nil,
:fqdn => fqdn,
:resource_id => oar_resource_id,
}
(0...cpu_count).each do |cpu_num|
############################################
# (2-d) Associate a cpuset to each core
# See https://www.grid5000.fr/w/TechTeam:CPU_core_numbering
# Suite of (2-a): Iterate over CORES of the CPU
############################################
if core_numbering == 'contiguous'
row[:cpuset] = cpu_num * cpu_core_count + core_num
elsif core_numbering == 'contiguous-including-threads'
row[:cpuset] = cpu_num * cpu_thread_count + core_num
elsif core_numbering == 'contiguous-grouped-by-threads'
row[:cpuset] = cpu_num * cpu_thread_count + core_num * core_thread_count
elsif core_numbering == 'round-robin'
row[:cpuset] = cpu_num + core_num * cpu_count
else
raise
end
row[:cpumodel] = cpu_model
(0...cpu_core_count).each do |core_num|
# Compute cpu and core ID
oar_resource_id = oar_resource_ids[core_idx]
cpu_id = phys_rsc_map['cpu'][:current_ids][cpu_idx]
core_id = phys_rsc_map['core'][:current_ids][core_idx]
# Prepare an Hash that represents a single OAR resource. Few
# keys are initialized with empty values.
row = {
cluster: cluster_name,
host: name,
cpu: cpu_id,
core: core_id,
cpuset: nil,
gpu: nil,
gpudevice: nil,
gpudevicepath: nil,
cpumodel: nil,
gpumodel: nil,
oar_properties: nil,
fqdn:,
resource_id: oar_resource_id
}
############################################
# (2-d) Associate a cpuset to each core
# See https://www.grid5000.fr/w/TechTeam:CPU_core_numbering
############################################
if core_numbering == 'contiguous'
row[:cpuset] = cpu_num * cpu_core_count + core_num
elsif core_numbering == 'contiguous-including-threads'
row[:cpuset] = cpu_num * cpu_thread_count + core_num
elsif core_numbering == 'contiguous-grouped-by-threads'
row[:cpuset] = cpu_num * cpu_thread_count + core_num * core_thread_count
elsif core_numbering == 'round-robin'
row[:cpuset] = cpu_num + core_num * cpu_count
else
raise
end
############################################
# (2-e) [if cluster with GPU] Associate a gpuset to each core
############################################
row[:cpumodel] = cpu_model
############################################
# (2-e) [if cluster with GPU] Associate a gpuset to each core
############################################
if node_description.key? 'gpu_devices' and node_description['gpu_devices'].values.select do |v|
v.fetch('reservation', true)
end.length > 0
# The node has reservable GPUs
# numa_gpus is the list of gpus for the current CPU
numa_gpus = node_description['gpu_devices'].values.select do |v|
field = if v.key? 'cpu_affinity_override'
'cpu_affinity_override'
else
'cpu_affinity'
end
v[field] == cpu_num and v.fetch('reservation', true)
end
if numa_gpus.empty?
# This core is not associated to any GPU
unless node_description['gpu_devices'].values.select { |v| v.fetch('reservation', true) }.length == 1
raise "#{fqdn}: No GPU to associate to CPU #{cpu_num}, core #{row[:cpuset]}. You probably want to use cpu_affinity_override to affect a GPU to this CPU."
end
if node_description.key? "gpu_devices" and node_description["gpu_devices"].values.select { |v| v.fetch("reservation", true) }.length > 0
# The node has reservable GPUs
# numa_gpus is the list of gpus for the current CPU
numa_gpus = node_description["gpu_devices"].values.select do |v|
field = if v.key? 'cpu_affinity_override'
'cpu_affinity_override'
else
'cpu_affinity'
end
v[field] == cpu_num and v.fetch("reservation", true)
end
if numa_gpus.empty?
# This core is not associated to any GPU
if node_description["gpu_devices"].values.select { |v| v.fetch("reservation", true) }.length == 1
# The node has only one reservable GPU, we affect it to all cores
selected_gpu = node_description["gpu_devices"].values.first
selected_gpu = node_description['gpu_devices'].values.first
elsif numa_gpus.first.key? 'cores_affinity'
# this cluster uses cores_affinity, not arbitrary allocation
selected_gpu = numa_gpus.find do |g|
g['cores_affinity'].split.map do |e|
e.to_i
end.include?(row[:cpuset])
end
raise "Could not find a GPU on CPU #{cpu_num} for core #{row[:cpuset]}" if selected_gpu.nil?
else
raise "#{fqdn}: No GPU to associate to CPU #{cpu_num}, core #{row[:cpuset]}. You probably want to use cpu_affinity_override to affect a GPU to this CPU."
# The parenthesis order is important: we want to keep the
# integer division to generate an integer index, so we want to
# do the multiplication first.
gpu_idx = (core_num * numa_gpus.length) / cpu_core_count
selected_gpu = numa_gpus[gpu_idx]
end
elsif numa_gpus.first.key? 'cores_affinity'
# this cluster uses cores_affinity, not arbitrary allocation
selected_gpu = numa_gpus.find { |g| g['cores_affinity'].split.map { |e| e.to_i }.include?(row[:cpuset]) }
if selected_gpu.nil?
raise "Could not find a GPU on CPU #{cpu_num} for core #{row[:cpuset]}"
end
else
# The parenthesis order is important: we want to keep the
# integer division to generate an integer index, so we want to
# do the multiplication first.
gpu_idx = (core_num * numa_gpus.length) / cpu_core_count
selected_gpu = numa_gpus[gpu_idx]
# id of the selected GPU in the node
local_id = node_description['gpu_devices'].values.index(selected_gpu)
# to assign the gpu number, just use the number of nodes and the number of GPUs per node
# sanity check: we must fall into the correct range
gpu = phys_rsc_map['gpu'][:current_ids].min + node_index0 * gpu_count + local_id
raise "Invalid GPU number for cluster #{cluster_name}" if gpu > phys_rsc_map['gpu'][:current_ids].max
row[:gpu] = gpu
row[:gpudevice] = local_id
row[:gpudevicepath] = selected_gpu['device']
row[:gpumodel] = selected_gpu['model']
end
# id of the selected GPU in the node
local_id = node_description["gpu_devices"].values.index(selected_gpu)
# to assign the gpu number, just use the number of nodes and the number of GPUs per node
# sanity check: we must fall into the correct range
gpu = phys_rsc_map["gpu"][:current_ids].min + node_index0 * gpu_count + local_id
if gpu > phys_rsc_map["gpu"][:current_ids].max
raise "Invalid GPU number for cluster #{cluster_name}"
end
row[:gpu] = gpu
row[:gpudevice] = local_id
row[:gpudevicepath] = selected_gpu['device']
row[:gpumodel] = selected_gpu['model']
end
core_idx += 1
core_idx += 1
generated_node_description[:oar_rows].push(row)
generated_node_description[:oar_rows].push(row)
end
cpu_idx += 1
end
cpu_idx += 1
end
generated_hierarchy[:nodes].push(generated_node_description)
generated_hierarchy[:nodes].push(generated_node_description)
end
end
generated_hierarchy
end
return generated_hierarchy
end
############################################
# MAIN function
############################################
# This function is called from RAKE and is in charge of
# - printing OAR commands to
# > add a new cluster
# > update and existing cluster
# - execute these commands on an OAR server
def generate_oar_properties(options)
############################################
# MAIN function
############################################
# Reset the OAR API cache, because the rpec tests change the data in our back
# while calling multiple times this function.
@@oar_data = {}
# This function is called from RAKE and is in charge of
# - printing OAR commands to
# > add a new cluster
# > update and existing cluster
# - execute these commands on an OAR server
def generate_oar_properties(options)
# Reset the OAR API cache, because the rpec tests change the data in our back
# while calling multiple times this function.
@@oar_data = {}
options[:api] ||= {}
conf = RefRepo::Utils.get_api_config
options[:api][:user] = conf['username']
options[:api][:pwd] = conf['password']
options[:api][:uri] = conf['uri']
options[:ssh] ||= {}
options[:ssh][:user] ||= 'g5kadmin'
options[:ssh][:host] ||= 'oar.%s.g5kadmin'
options[:sites] = [options[:site]] # for compatibility with other generators
options[:api] ||= {}
conf = RefRepo::Utils.get_api_config
options[:api][:user] = conf['username']
options[:api][:pwd] = conf['password']
options[:api][:uri] = conf['uri']
options[:ssh] ||= {}
options[:ssh][:user] ||= 'g5kadmin'
options[:ssh][:host] ||= 'oar.%s.g5kadmin'
options[:sites] = [options[:site]] # for compatibility with other generators
############################################
# Fetch:
# 1) generated data from load_data_hierarchy
# 2) oar properties from the reference repository
############################################
############################################
# Fetch:
# 1) generated data from load_data_hierarchy
# 2) oar properties from the reference repository
############################################
# Load the description from the ref-api (data/ dir)
data_hierarchy = load_data_hierarchy
# Load the description from the ref-api (data/ dir)
data_hierarchy = load_data_hierarchy
# filter based on site/cluster
site_name = options[:site]
# filter based on site/cluster
site_name = options[:site]
# Replace the site placeholder of ssh hosts by the site
options[:ssh][:host] = options[:ssh][:host].gsub('%s', site_name)
# Replace the site placeholder of ssh hosts by the site
options[:ssh][:host] = options[:ssh][:host].gsub('%s', site_name)
# If no cluster is given, then the clusters are the cluster of the given site
if !options.key? :clusters or options[:clusters].length == 0
raise("The provided site does not exist : I can't detect clusters") unless data_hierarchy['sites'].key? site_name
# If no cluster is given, then the clusters are the cluster of the given site
if not options.key? :clusters or options[:clusters].length == 0
if data_hierarchy['sites'].key? site_name
clusters = data_hierarchy['sites'][site_name]['clusters'].keys
options[:clusters] = clusters
else
raise("The provided site does not exist : I can't detect clusters")
clusters = options[:clusters]
end
else
clusters = options[:clusters]
end
# convert to OAR properties
refrepo_properties = get_oar_properties_from_the_ref_repo(data_hierarchy, {
:sites => [site_name]
})
# convert to OAR properties
refrepo_properties = get_oar_properties_from_the_ref_repo(data_hierarchy, {
sites: [site_name]
})
# also fetch the resources hierarchy inside nodes (cores, gpus, etc.)
generated_hierarchy = extract_clusters_description(clusters,
# also fetch the resources hierarchy inside nodes (cores, gpus, etc.)
generated_hierarchy = extract_clusters_description(clusters,
site_name,
options,
data_hierarchy,
refrepo_properties[site_name])
############################################
# Output generated information
############################################
############################################
# Output generated information
############################################
ret = 0
ret = 0
# DO=table
if options.key? :table and options[:table]
display_resources_table(generated_hierarchy)
end
# DO=table
display_resources_table(generated_hierarchy) if options.key? :table and options[:table]
# Do=Diff
if options.key? :diff and options[:diff]
ret = do_diff(options, generated_hierarchy, refrepo_properties)
end
# Do=Diff
ret = do_diff(options, generated_hierarchy, refrepo_properties) if options.key? :diff and options[:diff]
# DO=print
if options.key? :print and options[:print]
cmds = export_rows_as_oar_command(generated_hierarchy, site_name, refrepo_properties[site_name], data_hierarchy)
# DO=print
if options.key? :print and options[:print]
cmds = export_rows_as_oar_command(generated_hierarchy, site_name, refrepo_properties[site_name], data_hierarchy)
puts(cmds)
end
puts(cmds)
end
# Do=update
if options[:update]
printf 'Apply changes to the OAR server ' + options[:ssh][:host].gsub('%s', site_name) + ' ? (y/N) '
prompt = STDIN.gets.chomp
cmds = export_rows_as_oar_command(generated_hierarchy, site_name, refrepo_properties[site_name], data_hierarchy)
run_commands_via_ssh(cmds, options) if prompt.downcase == 'y'
end
# Do=update
if options[:update]
printf 'Apply changes to the OAR server ' + options[:ssh][:host].gsub('%s', site_name) + ' ? (y/N) '
prompt = STDIN.gets.chomp
cmds = export_rows_as_oar_command(generated_hierarchy, site_name, refrepo_properties[site_name], data_hierarchy)
run_commands_via_ssh(cmds, options) if prompt.downcase == 'y'
ret
end
return ret
end
end # Module
include OarProperties
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment