Mentions légales du service

Skip to content
Snippets Groups Projects
hardware.rb 22.56 KiB
# coding: utf-8
require 'refrepo/gen/wiki/generators/site_hardware'

class G5KHardwareGenerator < WikiGenerator

  def initialize(page_name)
    super(page_name)
  end

  def generate_content
    @global_hash = get_global_hash
    @site_uids = G5K::SITES

    @generated_content = "__NOEDITSECTION__\n"
    @generated_content += "{{Portal|User}}\n"
    @generated_content += "<div class=\"sitelink\">Hardware: [[Hardware|Global]] | " + G5K::SITES.map { |e| "[[#{e.capitalize}:Hardware|#{e.capitalize}]]" }.join(" | ") + "</div>\n"
    @generated_content += generate_summary
    @generated_content += "\n= Clusters =\n"
    @generated_content += SiteHardwareGenerator.generate_all_clusters
    @generated_content += generate_totals
    @generated_content += MW.italic(MW.small(generated_date_string))
    @generated_content += MW::LINE_FEED
  end

  def generate_summary
    sites = @global_hash['sites'].length
    clusters = 0
    nodes = 0
    cores = 0
    gpus = 0
    hdds = 0
    ssds = 0
    storage_space = 0
    ram = 0
    pmem = 0
    flops = 0

    @global_hash['sites'].sort.to_h.each do |site_uid, site_hash|
      clusters += site_hash['clusters'].length
      site_hash['clusters'].sort.to_h.each do |cluster_uid, cluster_hash|
        cluster_hash['nodes'].sort.to_h.each do |node_uid, node_hash|
          next if node_hash['status'] == 'retired'
          nodes += 1
          cores += node_hash['architecture']['nb_cores']
          ram += node_hash['main_memory']['ram_size']
          pmem += node_hash['main_memory']['pmem_size'] if node_hash['main_memory']['pmem_size']
          if node_hash['gpu_devices']
            gpus += node_hash['gpu_devices'].length
          end
          ssds += node_hash['storage_devices'].select { |d| d['storage'] == 'SSD' }.length
          hdds += node_hash['storage_devices'].select { |d| d['storage'] == 'HDD' }.length
          node_hash['storage_devices'].each do |i|
            storage_space += i['size']
          end
          flops += node_hash['performance']['node_flops']
        end
      end
    end
    tflops = sprintf("%.1f", flops.to_f / (10**12))
    return <<-EOF
= Summary =
* #{sites} sites
* #{clusters} clusters
* #{nodes} nodes
* #{cores} CPU cores
* #{gpus} GPUs
* #{G5K.get_size(ram)} RAM + #{G5K.get_size(pmem)} PMEM
* #{ssds} SSDs and #{hdds} HDDs on nodes (total: #{G5K.get_size(storage_space, 'metric')})
* #{tflops} TFLOPS (excluding GPUs)
    EOF
  end

  def generate_totals
    data = {
      'proc_families' => {},
      'proc_models' => {},
      'core_models' => {},
      'ram_size' => {},
      'pmem_size' => {},
      'net_interconnects' => {},
      'net_models' => {},
      'nvme_models' => {},
      'acc_families' => {},
      'acc_models' => {},
      'acc_cores' => {},
      'node_models' => {}
    }

    @global_hash['sites'].sort.to_h.each { |site_uid, site_hash|
      site_hash['clusters'].sort.to_h.each { |cluster_uid, cluster_hash|
        cluster_hash['nodes'].sort.to_h.each { |node_uid, node_hash|
          begin
            next if node_hash['status'] == 'retired'
            @node = node_uid

            # Processors
            model = node_hash['processor']['model']
            version = "#{model} #{node_hash['processor']['version']}"
            microarchitecture = node_hash['processor']['microarchitecture']

            cluster_procs = node_hash['architecture']['nb_procs']
            cluster_cores = node_hash['architecture']['nb_cores']

            key = [model]
            init(data, 'proc_families', key)
            data['proc_families'][key][site_uid] += cluster_procs

            key = [{text: microarchitecture || ' ', sort: get_date(microarchitecture) + ', ' + microarchitecture.to_s}, {text: version, sort: get_date(microarchitecture) + ', ' + version.to_s}]
            init(data, 'proc_models', key)
            data['proc_models'][key][site_uid] += cluster_procs

            init(data, 'core_models', key)
            data['core_models'][key][site_uid] += cluster_cores

            # RAM size
            ram_size = node_hash['main_memory']['ram_size']
            key = [{ text: G5K.get_size(ram_size), sort: (ram_size / 2**30).to_s.rjust(6, '0') + ' GB' }]
            init(data, 'ram_size', key)
            data['ram_size'][key][site_uid] += 1

            # PMEM size
            if node_hash['main_memory']['pmem_size']
              pmem_size = node_hash['main_memory']['pmem_size']
              key = [{ text: G5K.get_size(pmem_size), sort: (pmem_size / 2**30).to_s.rjust(6, '0') + ' GB' }]
              init(data, 'pmem_size', key)
              data['pmem_size'][key][site_uid] += 1
            end

            # HPC Networks
            interfaces = node_hash['network_adapters'].select{ |v|
              v['enabled'] and
                (v['mounted'] or v['mountable']) and
                not v['management'] and
                (v['device'] =~ /\./).nil? # exclude PKEY / VLAN interfaces see #9417
            }.map{ |v|
              [
                {
                  text: v['interface'] + ' ' + G5K.get_rate(v['rate']),
                  sort: v['interface'] + ' ' + ((v['rate'])/10**6).to_s.rjust(6, '0') + ' Gbps'
                }
              ]
            }

            net_interconnects = interfaces.inject(Hash.new(0)){ |h, v| h[v] += 1; h }
            net_interconnects.sort_by { |k, v|  k.first[:sort] }.each { |k, v|
              init(data, 'net_interconnects', k)
              data['net_interconnects'][k][site_uid] += v
            }

            # NIC models
            interfaces = node_hash['network_adapters'].select{ |v|
              v['enabled'] and
                (v['mounted'] or v['mountable']) and
                not v['management'] and
                (v['device'] =~ /\./).nil? # exclude PKEY / VLAN interfaces see #9417
            }.map{ |v|
              t = (v['vendor'] || 'N/A') + ' ' + (v['model'] || 'N/A');
              [
                {
                  text: v['interface'],
                  sort: v['interface']
                },
                {
                  text: v['driver'], sort: v['driver']
                },
                {
                  text: t, sort: t
                }
              ]
            }.uniq

            net_models = interfaces.inject(Hash.new(0)){ |h, v| h[v] += 1; h }
            # Sort by interface type (eth or IB) and then by driver
            net_models.sort_by { |k, v|  [k.first[:sort], k[1][:sort]] }.each { |k, v|
              init(data, 'net_models', k)
              data['net_models'][k][site_uid] += v
            }

            # NVMe SSD models
            nvme = node_hash['storage_devices'].select{ |v|
              v['interface'] == 'NVME'}.map{ |v|
              t = v['alt_model_name'] || v['model'] || 'N/A';
              [
                {
                  text: t, sort: t
                }
              ]
            }

            nvme_models = nvme.inject(Hash.new(0)){ |h, v| h[v] += 1; h }
            nvme_models.sort_by { |k, v|  k.first[:sort] }.each { |k, v|
              init(data, 'nvme_models', k)
              data['nvme_models'][k][site_uid] += v
            }

            # Accelerators
            m = node_hash['mic']

            mic_families = {}
            mic_families[[m['mic_vendor']]] = m['mic_count'] if m and m['mic']
            mic_details = {}
            mic_details[["#{m['mic_vendor']} #{m['mic_model']}"]] = [m['mic_count'], m['mic_cores']] if m and m['mic']

            lg = node_hash['gpu_devices']
            gpu_families = {}
            gpu_details = {}
            unless lg.nil?
              lg.each { |g|
                d = g[1]
                vendor = d['vendor']
                cmodel = d['model']
                model = cmodel
                nbcores = GPURef.getNumberOfCoresFor(cmodel)

                family = gpu_families[[vendor]]
                if family.nil?
                  gpu_families[[vendor]] = 1
                else
                  gpu_families[[vendor]] += 1
                end

                details = gpu_details[["#{vendor} #{model}"]]

                if details.nil?
                  gpu_details[["#{vendor} #{model}"]] = [1, nbcores]
                else
                  gpu_details[["#{vendor} #{model}"]] = [details[0]+1, details[1]+nbcores]
                end
              }
            end

            gpu_families.merge(mic_families).sort.to_h.each { |k, v|
              init(data, 'acc_families', k)
              data['acc_families'][k][site_uid] += v
            }

            gpu_details.merge(mic_details).sort.to_h.each { |k, v|
              init(data, 'acc_models', k)
              data['acc_models'][k][site_uid] += v[0]

              init(data, 'acc_cores', k)
              data['acc_cores'][k][site_uid] += v[1]
            }

            # Nodes
            key = [cluster_hash['model']]
            init(data, 'node_models', key)
            data['node_models'][key][site_uid] += 1
          rescue
            puts "ERROR while processing #{node_uid}: #{$!}"
            raise
          end
        }
      }
    }

    # Table construction
    generated_content = "= Processors ="
    generated_content += "\n== Processors counts per families ==\n"
    sites = @site_uids.map{ |e| "[[#{e.capitalize}:Hardware|#{e.capitalize}]]" }
    table_options = 'class="wikitable sortable" style="text-align: center;"'
    table_columns = ['Processor family'] + sites + ['Processors total']
    generated_content += MW.generate_table(table_options, table_columns, get_table_data(data, 'proc_families'))
    generated_content += "\n== Processors counts per models ==\n"
    table_columns = ['Microarchitecture', 'Processor model'] + sites + ['Processors total']
    generated_content += MW.generate_table(table_options, table_columns, get_table_data(data, 'proc_models'))
    generated_content += "\n== Cores counts per models ==\n"
    table_columns =  ['Microarchitecture', 'Core model'] + sites + ['Cores total']
    generated_content += MW.generate_table(table_options, table_columns, get_table_data(data, 'core_models'))

    generated_content += "\n= Memory =\n"
    generated_content += "\n== RAM size per node ==\n"
    table_columns = ['RAM size'] + sites + ['Nodes total']
    generated_content += MW.generate_table(table_options, table_columns, get_table_data(data, 'ram_size'))
    generated_content += "\n== PMEM size per node ==\n"
    table_columns = ['PMEM size'] + sites + ['Nodes total']
    generated_content += MW.generate_table(table_options, table_columns, get_table_data(data, 'pmem_size'))

    generated_content += "\n= Networking =\n"
    generated_content += "\n== Network interconnects ==\n"
    table_columns = ['Interconnect'] + sites + ['Cards total']
    generated_content += MW.generate_table(table_options, table_columns, get_table_data(data, 'net_interconnects'))

    generated_content += "\n== Nodes with several Ethernet interfaces ==\n"
    generated_content +=  generate_interfaces

    generated_content += "\n== Nodes with SR-IOV support ==\n"
    generated_content +=  generate_sriov_interfaces

    generated_content += "\n== Network interface models ==\n"
    table_columns = ['Type', 'Driver', 'Model'] + sites + ['Cards total']
    generated_content += MW.generate_table(table_options, table_columns, get_table_data(data, 'net_models'))

    generated_content += "\n= Storage ="
    generated_content += "\n== NVMe SSD models ==\n"
    table_columns = ['NVMe SSD models'] + sites + ['NVMe SSDs total']
    generated_content += MW.generate_table(table_options, table_columns, get_table_data(data, 'nvme_models'))
    generated_content += "\n== Nodes with several disks ==\n"
    generated_content +=  generate_storage
    generated_content += "\n''*: disk is [[Disk_reservation|reservable]]''"

    generated_content += "\n= Accelerators (GPU, Xeon Phi) ="
    generated_content += "\n== Accelerator families ==\n"
    table_columns = ['Accelerator family'] + sites + ['Accelerators total']
    generated_content += MW.generate_table(table_options, table_columns, get_table_data(data, 'acc_families'))
    table_columns = ['Accelerator model'] + sites + ['Accelerators total']
    generated_content += "\n== Accelerator models ==\n"
    generated_content += MW.generate_table(table_options, table_columns, get_table_data(data, 'acc_models'))
    generated_content += "\n== Accelerator cores ==\n"
    table_columns = ['Accelerator model'] + sites + ['Cores total']
    generated_content += MW.generate_table(table_options, table_columns, get_table_data(data, 'acc_cores'))

    generated_content += "\n= Nodes models =\n"
    table_columns = ['Nodes model'] + sites + ['Nodes total']
    generated_content += MW.generate_table(table_options, table_columns, get_table_data(data, 'node_models'))
    return generated_content
  end

  def init(data, key1, key2)
    if not data[key1].key?(key2)
      data[key1][key2] = {}
      @site_uids.each { |s| data[key1][key2][s] = 0 }
    end
  end

  # This method generates a wiki table from data[key] values, sorted by key
  # values in first column.
  def get_table_data(data, key)
    raw_data = []
    table_data = []
    index = 0
    k0 = 0
    data[key].sort_by{
      # Sort the table by the identifiers (e.g. Microarchitecture, or Microarchitecture + CPU name).
      # This colum is either just a text field, or a more complex hash with a :sort key that should be
      # used for sorting.
      |k, v| k.map { |c| c.kind_of?(Hash) ? c[:sort] : c }
    }.to_h.each { |k, v|
      k0 = k if index == 0
      index += 1
      elts = v.sort.to_h.values
      raw_data << elts
      table_data << k.map{ |e| e.kind_of?(Hash) ? "data-sort-value=\"#{e[:sort]}\"|#{e[:text]}" : "data-sort-value=\"#{index.to_s.rjust(3, '0')}\"|#{e}" } +
        elts.map{ |e| e.kind_of?(Hash) ? "data-sort-value=\"#{e[:sort]}\"|#{e[:text]}" : e }
        .map{ |e| e == 0 ? '' : e  } + ["'''#{elts.reduce(:+)}'''"]
    }
    elts = raw_data.transpose.map{ |e| e.reduce(:+)}
    table_data << {columns: ["'''Sites total'''"] +
                   [' '] * (k0.length - 1) +
                   (elts + [elts.reduce(:+)]).map{ |e| e == 0 ? '' : "'''#{e}'''" },
                   sort: false}
  end

  # See: https://en.wikipedia.org/wiki/List_of_Intel_Xeon_microprocessors
  # For a correct sort of the column, all dates must be in the same
  # format (same number of digits)
  def get_date(microarchitecture)
    return 'MISSING' if microarchitecture.nil?
    release_dates = {
      'K8' => '2003',
      'K10' => '2007',
      'Clovertown' => '2006',
      'Harpertown' => '2007',
      'Dunnington' => '2008',
      'Lynnfield' => '2009',
      'Nehalem' => '2010',
      'Westmere' => '2011',
      'Sandy Bridge' => '2012',
      'Haswell' => '2013',
      'POWER8' => '2014',
      'Broadwell' => '2015',
      'Skylake' => '2016',
      'Zen' => '2017',
      'Zen 2' => '2019',
      'Cascade Lake-SP' => '2019',
      'Vulcan' => '2018',
    }
    date = release_dates[microarchitecture]
    raise "ERROR: microarchitecture not found: '#{microarchitecture}'. Add in hardware.rb" if date.nil?
    date
  end

  def generate_storage
    table_columns = ["Site", "Cluster", "Number of nodes", "Main disk", "Additional HDDs", "Additional SSDs"]
    table_data = []
    global_hash = get_global_hash

    # Loop over Grid'5000 sites
    global_hash["sites"].sort.to_h.each do |site_uid, site_hash|
      site_hash.fetch("clusters").sort.to_h.each do |cluster_uid, cluster_hash|
        nodes_data = []
        cluster_hash.fetch('nodes').sort.to_h.each do |node_uid, node_hash|
          next if node_hash['status'] == 'retired'
          sd = node_hash['storage_devices']
          reservable_disks = sd.select{ |v| v['reservation'] == true }.count > 0
          maindisk = sd.select { |v| v['device'] == 'sda' }[0] || sd.select { |v| v['device'] == 'nvme0n1' }[0]
          maindisk_t = maindisk['storage'] + ' ' + G5K.get_size(maindisk['size'],'metric')
          other = sd.select { |d| d['device'] != 'sda' }
          hdds = other.select { |d| d['storage'] == 'HDD' }
          if hdds.count == 0
            hdd_t = "0"
          else
            hdd_t = hdds.count.to_s + " (" + hdds.map { |d|
              G5K.get_size(d['size'],'metric') +
              ((!d['reservation'].nil? && d['reservation']) ? '[[Disk_reservation|*]]' : '')
            }.join(', ') + ")"
          end
          ssds = other.select { |d| d['storage'] == 'SSD' }
          if ssds.count == 0
            ssd_t = "0"
          else
            ssd_t = ssds.count.to_s + " (" + ssds.map { |d|
              G5K.get_size(d['size'],'metric') +
              ((!d['reservation'].nil? && d['reservation']) ? '[[Disk_reservation|*]]' : '')
            }.join(', ') + ")"
          end
          nodes_data << { 'uid' => node_uid, 'data' => { 'main' => maindisk_t, 'hdd' => hdd_t, 'ssd' => ssd_t, 'reservation' => reservable_disks } }
        end
        nd = nodes_data.group_by { |d| d['data'] }
        nd.each do |data, nodes|
          # only keep nodes with more than one disk
          next if data['hdd'] == "0" and data['ssd'] == "0"
          if nd.length == 1
            nodesetname = cluster_uid
          else
            nodesetname = cluster_uid + '-' + G5K.nodeset(nodes.map { |n| n['uid'].split('-')[1].to_i })
          end
          table_data << [
            "[[#{site_uid.capitalize}:Hardware|#{site_uid.capitalize}]]",
              "[[#{site_uid.capitalize}:Hardware##{cluster_uid}|#{nodesetname}]]",
              nodes.length,
              data['main'],
              data['hdd'],
              data['ssd'],
          ]
        end
      end
    end
    # Sort by site and cluster name
    table_data.sort_by! { |row|
      [row[0], row[1]]
    }

    # Table construction
    table_options = 'class="wikitable sortable" style="text-align: center;"'
    return MW.generate_table(table_options, table_columns, table_data)
  end

  def generate_interfaces
    table_data = []
    @global_hash["sites"].sort.to_h.each { |site_uid, site_hash|
      site_hash.fetch("clusters").sort.to_h.each { |cluster_uid, cluster_hash|
        network_interfaces = {}
        cluster_hash.fetch('nodes').sort.to_h.each { |node_uid, node_hash|
          next if node_hash['status'] == 'retired'
          if node_hash['network_adapters']
            node_interfaces = node_hash['network_adapters'].select{ |v|
              v['interface'] == 'Ethernet' and
              v['enabled'] == true and
              (v['mounted'] == true or v['mountable'] == true) and
              v['management'] == false
            }

            interfaces = {}
            interfaces['25g_count'] = node_interfaces.select { |v| v['rate'] == 25_000_000_000 }.count
            interfaces['10g_count'] = node_interfaces.select { |v| v['rate'] == 10_000_000_000 }.count
            interfaces['1g_count'] = node_interfaces.select { |v| v['rate'] == 1_000_000_000 }.count
            interfaces['details'] = node_interfaces.map{ |v| v['device'] + (v['name'].nil? ? '' : '/' + v['name'])  + ' (' + G5K.get_rate(v['rate']) + ')' }.sort.join(', ')
            queues = cluster_hash['queues'] - ['admin', 'default', 'testing']
            interfaces['queues'] = (queues.nil? || (queues.empty? ? '' : queues[0] + G5K.pluralize(queues.count, ' queue')))
            interface_add(network_interfaces, node_uid, interfaces) if node_interfaces.count > 1
          end
        }

        # One line for each group of nodes with the same interfaces
        network_interfaces.sort.to_h.each { |num, interfaces|
          table_data << [
            "[[#{site_uid.capitalize}:Network|#{site_uid.capitalize}]]",
            "[[#{site_uid.capitalize}:Hardware##{cluster_uid}" + "|#{cluster_uid}" + (network_interfaces.size==1 ? '' : '-' + G5K.nodeset(num)) + "]]",
            num.count,
            interfaces['25g_count'].zero? ? '' : interfaces['25g_count'],
            interfaces['10g_count'].zero? ? '' : interfaces['10g_count'],
            interfaces['1g_count'].zero? ? '' : interfaces['1g_count'],
            interfaces['details']
          ]
        }
      }
    }
    # Sort by site and cluster name
    table_data.sort_by! { |row|
      [row[0], row[1]]
    }

    table_options = 'class="wikitable sortable" style="text-align: center;"'
    table_columns = ["Site", "Cluster", "Nodes", "25G interfaces", "10G interfaces", "1G interfaces", "Interfaces (throughput)"]
    MW.generate_table(table_options, table_columns, table_data)
  end

  def generate_sriov_interfaces
    table_data = []
    @global_hash["sites"].sort.to_h.each { |site_uid, site_hash|
      site_hash.fetch("clusters").sort.to_h.each { |cluster_uid, cluster_hash|
        network_interfaces = {}
        cluster_hash.fetch('nodes').sort.to_h.each { |node_uid, node_hash|
          next if node_hash['status'] == 'retired'
          if node_hash['network_adapters']
            node_interfaces = node_hash['network_adapters'].select{ |v|
              v['sriov'] and
              v['enabled'] == true and
              (v['mounted'] == true or v['mountable'] == true) and
              v['management'] == false
            }

            interfaces = {}
            interfaces['details'] = node_interfaces.map{ |v| v['device'] + (v['name'].nil? ? '' : '/' + v['name']) + " (#{v['sriov_totalvfs']} VFs)" }.sort.join(', ')
            interfaces['vfs_sum'] = node_interfaces.map{ |v| v['sriov_totalvfs'] }.sum
            interface_add(network_interfaces, node_uid, interfaces) if node_interfaces.count > 0
          end
        }

        # One line for each group of nodes with the same interfaces
        network_interfaces.sort.to_h.each { |num, interfaces|
          table_data << [
            "[[#{site_uid.capitalize}:Network|#{site_uid.capitalize}]]",
            "[[#{site_uid.capitalize}:Hardware##{cluster_uid}" + "|#{cluster_uid}" + (network_interfaces.size==1 ? '' : '-' + G5K.nodeset(num)) + "]]",
            num.count,
            "data-sort-value=\"#{interfaces['vfs_sum']}\"|#{interfaces['details']}"
          ]
        }
      }
    }
    # Sort by site and cluster name
    table_data.sort_by! { |row|
      [row[0], row[1]]
    }

    table_options = 'class="wikitable sortable" style="text-align: center;"'
    table_columns = ["Site", "Cluster", "Nodes", "Interfaces (max number of Virtual Functions)"]
    MW.generate_table(table_options, table_columns, table_data)
  end

  # This methods adds the array interfaces to the hash
  # network_interfaces. If nodes 2,3,7 have the same interfaces, they
  # will be gathered in the same key and we will have
  # network_interfaces[[2,3,7]] = interfaces
  def interface_add(network_interfaces, node_uid, interfaces)
    num1 = node_uid.split('-')[1].to_i
    if network_interfaces.has_value?(interfaces) == false
      network_interfaces[[num1]] = interfaces
    else
      num2 = network_interfaces.key(interfaces)
      network_interfaces.delete(num2)
      network_interfaces[num2.push(num1)] = interfaces
    end
  end
end