Mentions légales du service

Skip to content
Snippets Groups Projects
oar-properties.rb 5.31 KiB
# coding: utf-8

require 'net/ssh'
require 'hashdiff'

# propriétés ignorées
IGNORED_PROPERTIES=%w{}

# Propriétés qui devraient être présentes
G5K_PROPERTIES=%w{api_timestamp available_upto besteffort chassis chunks cluster cluster_priority comment core cpu cpuarch cpucore cpufreq cpuset cputype deploy desktop_computing disk disk_reservation_count diskpath disktype drain eth_count eth_rate expiry_date finaud_decision gpu gpudevice gpu_model gpu_count grub host ib ib_count ib_rate id ip last_available_upto last_job_date links maintenance max_walltime memcore memcpu memnode mic myri myri_count myri_rate network_address next_finaud_decision next_state nodemodel production rconsole scheduler_priority slash_16 slash_17 slash_18 slash_19 slash_20 slash_21 slash_22 state state_num subnet_address subnet_prefix suspended_jobs switch type virtual vlan wattmeter opa_count opa_rate}.sort - IGNORED_PROPERTIES


module RefRepo::Valid::OarProperties
  def self.check(options)
    ret = true
    options[:sites].each do |site|
      puts "Checking site #{site}..."
      resources = RefRepo::Utils::get_api("sites/#{site}/internal/oarapi/resources/details.json?limit=1000000")['items']

      default_resources = resources.select { |e| e['type'] == 'default' }.sort_by { |e| e['id'] }
      if not options[:clusters].empty?
        puts "Restricting to resources of clusters #{options[:clusters].join(',')}"
        default_resources.select! { |e| options[:clusters].include?(e['cluster']) }
      end

      # Checking scheduler_priority
      default_resources.each do |r|
        if r['scheduler_priority'] < 0
          puts "Invalid scheduler_priority value on #{r['id']}/#{r['network_address']}: #{r['scheduler_priority']}"
          ret = false
        end
      end

      # Non-default resources must have available_upto = 0 (see bug 8062)
      resources.select { |e| e['type'] != 'default' }.sort_by { |e| e['id'] }.each do |r|
        if r['available_upto'] != 0
          puts "Invalid available_upto value on #{r['id']} (type=#{r['type']}, state=#{r['state']}): #{r['available_upto']} (should be 0)"
          ret = false
        end
      end

      # Checking list of properties
      names = default_resources.map { |e| e.keys.sort }.uniq.first - IGNORED_PROPERTIES
      if names != G5K_PROPERTIES
        puts "ERROR: wrong list of properties:"
        ret = false
        puts "- " + (G5K_PROPERTIES - names).join(' ')
        puts "+ " + (names - G5K_PROPERTIES).join(' ')
      end

      # 'core' must be globally unique
      dupe_cores = default_resources.map { |e| e.slice('id', 'core', 'host', 'cpu', 'cpuset') }.group_by { |e| e['core'] }.to_a.select { |e| e[1].length > 1 }
      unless dupe_cores.empty?
        puts "ERROR: some resources have the same 'core' value. it should be globally unique over the site."
        ret = false
        pp dupe_cores if options[:verbose]
      end

      # 'cpu' must be unique to a 'host'
      dupe_cpus = default_resources.map { |e| [e['cpu'], e['host'] ]}.uniq.group_by { |e| e[0] }.to_a.select { |e| e[1].length > 1 }
      unless dupe_cpus.empty?
        puts "ERROR: some hosts have the same 'cpu' value. it should be globally unique over the site."
        ret = false
        pp dupe_cores if options[:verbose]
      end

      # for each host ...
      default_resources.map { |e| e['host'] }.uniq.each do |host|
        host_resources = default_resources.select { |e| e['host'] == host }
        cluster = host_resources.first['cluster']
        next if not options[:clusters].empty? and not options[:clusters].include?(cluster)

        # compute nbcores.
        # cpucore is cores per cpu. to know the number of cpus, we devide memnode per memcpu.
        nbcores = host_resources.map { |e| e['cpucore'] * (e['memnode'] / e['memcpu']) }.uniq
        if nbcores.length > 1
          raise "Invalid: varying nbcores inside cluster!"
        end
        nbcores = nbcores.first

        if host_resources.length != nbcores
          puts "ERROR: invalid number of resources for #{host}. should be nbcores."
          ret = false
        end

        # ids and cores should be in the same order
        host_cores = host_resources.map { |e| e['core'] }
        host_cores_min = host_cores.first
        host_cores_max = host_cores.last
        if host_cores_max - host_cores_min + 1 != nbcores
          puts "ERROR: core values for #{host} are not sequential"
          ret = false
        end
        # the first cpuset should be 0
        host_cpusets = host_resources.map { |e| e['cpuset'] }.sort
        host_cpusets_min = host_cpusets.first
        host_cpusets_max = host_cpusets.last
        if host_cpusets_min != 0
          puts "ERROR: first cpuset value for #{host} should be 0"
          ret = false
        end
        # the last cpuset should be nbcores-1, on aarch64 cpuset are not sequential
        if host_cpusets_max - host_cpusets_min + 1 != nbcores and host_resources.map { |e| e['cpuarch']} == 'x86_64'
          puts "ERROR: cpuset values for #{host} are not sequential"
          ret = false
        end
        if options[:verbose] and (host_cpusets_max - host_cpusets_min + 1 != nbcores or host_cores_max - host_cores_min + 1 != nbcores)
          puts "id   cpu   core   cpuset"
          pp(host_resources.map { |e| [e['id'], e['cpu'], e['core'], e['cpuset'] ] })
        end
      end
    end
    return ret
  end
end