From 00020c86f606f0c1658a037318f4dca58594baab Mon Sep 17 00:00:00 2001
From: Laurent Pouilloux <laurent.pouilloux@inria.fr>
Date: Tue, 19 Dec 2023 12:44:15 +0100
Subject: [PATCH] [gen:oar] wip refactoring

---
 lib/refrepo/gen/oar-properties.rb | 465 +++++++++++++-----------------
 1 file changed, 194 insertions(+), 271 deletions(-)

diff --git a/lib/refrepo/gen/oar-properties.rb b/lib/refrepo/gen/oar-properties.rb
index 82927e96469..638ec1ea8d7 100644
--- a/lib/refrepo/gen/oar-properties.rb
+++ b/lib/refrepo/gen/oar-properties.rb
@@ -1,48 +1,181 @@
 # coding: utf-8
 
 require 'hashdiff'
-require 'refrepo/data_loader'
 require 'net/ssh'
+require 'refrepo/data_loader'
 require 'refrepo/gpu_ref'
 
+# Define constant elements
+MiB = 1024**2
+IGNORE_DEFAULT_KEYS = [
+  "slash_16",
+  "slash_17",
+  "slash_18",
+  "slash_19",
+  "slash_20",
+  "slash_21",
+  "slash_22",
+  "available_upto",
+  "chunks",
+  "comment", # TODO
+  "core", # This property was created by 'oar_resources_add'
+  "cpu", # This property was created by 'oar_resources_add'
+  "host", # This property was created by 'oar_resources_add'
+  "gpudevice", # New property taken into account by the new generator
+  "gpu", # New property taken into account by the new generator
+  "cpuset",
+  "desktop_computing",
+  "drain",
+  "expiry_date",
+  "finaud_decision",
+  "grub",
+  "jobs", # This property exists when a job is running
+  "last_available_upto",
+  "last_job_date",
+  "network_address", # TODO
+  "next_finaud_decision",
+  "next_state",
+  "rconsole", # TODO
+  "resource_id",
+  "scheduler_priority",
+  "state",
+  "state_num",
+  "subnet_address",
+  "subnet_prefix",
+  "suspended_jobs",
+  "thread",
+  "type", # TODO
+  "vlan",
+  "pdu",
+  "id", # id from API (= resource_id from oarnodes)
+  "api_timestamp", # from API
+  "links", # from API
+]
+IGNORE_DISK_KEYS = ["disk", "diskpath"]
+IGNORE_KEYS = IGNORE_DEFAULT_KEYS + IGNORE_DISK_KEYS
+OAR_SYSTEM_KEYS = ['deploy', 'besteffort']
+
 # TODO for gpu_model (and others?) use NULL instead of empty string
 
 class MissingProperty < StandardError; end
 
-MiB = 1024**2
+module RefRepo::Gen::OarProperties
 
-############################################
-# Functions related to the "TABLE" operation
-############################################
+  # OAR API data cache
+  @@oar_data = {}
+
+  def self.generate_oar_properties(options)
+    # MAIN FUNCTION
+    # This function is called from RAKE and is in charge of
+    #   - printing OAR commands to
+    #      > add a new cluster
+    #      > update and existing cluster
+    #   - execute these commands on an OAR server
+
+    # Reset the OAR API cache, because the rspec tests change the data in our back
+    # while calling multiple times this function.
+    @@oar_data = {}
+
+    options[:api] ||= {}
+    conf = RefRepo::Utils.get_api_config
+    options[:api][:user] = conf['username']
+    options[:api][:pwd] = conf['password']
+    options[:api][:uri] = conf['uri']
+    options[:ssh] ||= {}
+    options[:ssh][:user] ||= 'g5kadmin'
+    options[:ssh][:host] ||= 'oar.%s.g5kadmin'
+
+    ############################################
+    # Fetch:
+    # 1) generated data from load_data_hierarchy
+    # 2) oar properties from the reference repository
+    ############################################
+
+    # Load the description from the ref-api (data/ dir)
+    data = load_data_hierarchy
+
+    # loop over the sites
+    options[:sites].each do |site|
+      site_data = data['sites'][site]
+      # Replace the site placeholder of ssh hosts by the site
+      options[:ssh][:host] = options[:ssh][:host].gsub('%s', site)
+      # If no cluster is given, retrieve the site clusters, 
+      site_clusters = site_data['clusters'].keys
+      clusters = options[:clusters].length == 0 ? site_clusters : options[:clusters].select{|c| site_clusters.include? c}
+      if clusters.length == 0
+        p "no cluster found for site #{site}"
+        next
+      end
+      # Generate OAR properties for refrepo
+      refrepo_properties = {}
+
+    exit
+  end
+
+  exit
+  # convert to OAR properties
+  #  = get_oar_properties_from_the_ref_repo(data_hierarchy, {
+  #     :sites => [site_name]
+  # })
+
+  # also fetch the resources hierarchy inside nodes (cores, gpus, etc.)
+  generated_hierarchy = extract_clusters_description(clusters,
+                                                       site_name,
+                                                       options,
+                                                       data_hierarchy,
+                                                       refrepo_properties[site_name])
+
+  ############################################
+  # Output generated information
+  ############################################
+
+  ret = 0
+
+  # DO=table
+  if options.key? :table and options[:table]
+    display_table(generated_hierarchy)
+  end
+
+  # Do=Diff
+  if options.key? :diff and options[:diff]
+    ret = do_diff(options, generated_hierarchy, refrepo_properties)
+  end
+
+  # DO=print
+  if options.key? :print and options[:print]
+    cmds = export_rows_as_oar_command(generated_hierarchy, site_name, refrepo_properties[site_name], data_hierarchy)
+
+    puts(cmds)
+  end
 
-module OarProperties
 
-# OAR API data cache
-@@oar_data = {}
+  # Do=update
+  if options[:update]
+    printf 'Apply changes to the OAR server ' + options[:ssh][:host].gsub('%s', site_name) + ' ? (y/N) '
+    prompt = STDIN.gets.chomp
+    cmds = export_rows_as_oar_command(generated_hierarchy, site_name, refrepo_properties[site_name], data_hierarchy)
+    run_commands_via_ssh(cmds, options) if prompt.downcase == 'y'
+  end
+
+  return ret
+end
 
-def export_rows_as_formated_line(generated_hierarchy)
+end # Module
+
+
+# Functions related to the "TABLE" operation
+def display_table(generated_hierarchy)
   # Display header
-  puts "+#{'-' * 10} + #{'-' * 20} + #{'-' * 5} + #{'-' * 5} + #{'-' * 8} + #{'-' * 4} + #{'-' * 20} + #{'-' * 30} + #{'-' * 30}+"
-  puts "|#{'cluster'.rjust(10)} | #{'host'.ljust(20)} | #{'cpu'.ljust(5)} | #{'core'.ljust(5)} | #{'cpuset'.ljust(8)} | #{'gpu'.ljust(4)} | #{'gpudevice'.ljust(20)} | #{'cpumodel'.ljust(30)} | #{'gpumodel'.ljust(30)}|"
-  puts "+#{'-' * 10} + #{'-' * 20} + #{'-' * 5} + #{'-' * 5} + #{'-' * 8} + #{'-' * 4} + #{'-' * 20} + #{'-' * 30} + #{'-' * 30}+"
-
-  oar_rows = generated_hierarchy[:nodes].map{|node| node[:oar_rows]}.flatten
-
-  # Display rows
-  oar_rows.each do |row|
-    cluster = row[:cluster].to_s
-    host = row[:host].to_s
-    cpu = row[:cpu].to_s
-    core = row[:core].to_s
-    cpuset = row[:cpuset].to_s
-    gpu = row[:gpu].to_s
-    gpudevice = row[:gpudevice].to_s
-    cpumodel = row[:cpumodel].to_s
-    gpumodel = row[:gpumodel].to_s
-    puts "|#{cluster.rjust(10)} | #{host.ljust(20)} | #{cpu.ljust(5)} | #{core.ljust(5)} | #{cpuset.ljust(8)} | #{gpu.ljust(4)} | #{gpudevice.ljust(20)} | #{cpumodel.ljust(30)} | #{gpumodel.ljust(30)}|"
+  cols = {cluster: 10, host: 20, cpu: 5, core: 5, cpuset: 8, gpu: 4, gpudevice: 20, cpumodel: 30, gpumodel: 30}
+  puts "+ #{cols.map{|_k, v| '-' * v }.join(' + ')} +"
+  puts "| #{cols.map{|k, v| k.to_s.ljust(v)}.join(' | ')} |"
+  puts "+ #{cols.map{|_k, v| '-' * v }.join(' + ')} +"
+
+  generated_hierarchy[:nodes].map{|node| node[:oar_rows]}.flatten.each do |row|
+    puts "| #{cols.map{|k, v| row[k].to_s.ljust(v)}} |"
   end
   # Display footer
-  puts "+#{'-' * 10} + #{'-' * 20} + #{'-' * 5} + #{'-' * 5} + #{'-' * 8} + #{'-' * 4} + #{'-' * 20} + #{'-' * 30} + #{'-' * 30}+"
+  puts "+ #{cols.map{|_k, v| '-' * v }.join(' + ')} +"
 end
 
 ############################################
@@ -85,9 +218,8 @@ end
 
 def generate_create_oar_property_cmd(properties_keys)
   command = ''
-  ignore_keys_list = ignore_default_keys()
   properties_keys.each do |key, key_type|
-    if ignore_keys_list.include?(key)
+    if IGNORE_DEFAULT_KEYS.include?(key)
       next
     end
     # keys such as deploy or besteffort are default OAR keys that should not be created
@@ -375,6 +507,11 @@ def get_ref_node_properties_internal(cluster_uid, cluster, node_uid, node)
   h['disktype'] = [node['storage_devices'].first['interface'], node['storage_devices'].first['storage']].join('/')
   h['chassis'] = [node['chassis']['manufacturer'], node['chassis']['name'], node['chassis']['serial']].join(' ')
 
+  # memory by node/cpu/core
+  h['memnode'] = node['main_memory']['ram_size'] / MiB
+  h['memcpu'] = node['main_memory']['ram_size'] / node['architecture']['nb_procs']/MiB
+  h['memcore'] = node['main_memory']['ram_size'] / node['architecture']['nb_cores']/MiB
+  
   # ETH
   ni_mountable = node['network_adapters'].select { |na| /^eth[0-9]*$/.match(na['device']) && (na['enabled'] == true && (na['mounted'] == true || na['mountable'] == true)) }
   ni_fastest   = ni_mountable.max_by { |na| na['rate'] || 0 }
@@ -407,7 +544,6 @@ def get_ref_node_properties_internal(cluster_uid, cluster, node_uid, node)
 
   puts "#{node_uid}: Warning - no rate info for the opa interface" if h['opa_count'] > 0 && h['opa_rate'] == 0
 
-
   # MYRINET
   ni_mountable = node['network_adapters'].select { |na| /^myri[0-9]*$/.match(na['device']) && (na['enabled'] == true && (na['mounted'] == true || na['mountable'] == true)) }
   ni_fastest   = ni_mountable.max_by { |na| na['rate'] || 0 }
@@ -419,11 +555,9 @@ def get_ref_node_properties_internal(cluster_uid, cluster, node_uid, node)
 
   puts "#{node_uid}: Warning - no rate info for the myri interface" if h['myri_count'] > 0 && h['myri_rate'] == 0
 
-  h['memcore'] = node['main_memory']['ram_size'] / node['architecture']['nb_cores']/MiB
-  h['memcpu'] = node['main_memory']['ram_size'] / node['architecture']['nb_procs']/MiB
-  h['memnode'] = node['main_memory']['ram_size'] / MiB
-
+  # GPU
   h['gpu_model'] = ''
+  h['gpu_mem'] = 0
   h['gpu_count'] = 0
 
   if node.key?('gpu_devices')
@@ -434,63 +568,35 @@ def get_ref_node_properties_internal(cluster_uid, cluster, node_uid, node)
     device = node['gpu_devices'].first[1]
     if GPURef.is_gpu_supported?(device)
       h['gpu_model'] = device['model']
+      h['gpu_mem'] = device['memory'] / MiB
       h['gpu_count'] = node['gpu_devices'].length
     end
   end
 
-  if node.key?('exotic')
-    h['exotic'] = node['exotic']
-  else
-    h['exotic'] = false
-  end
-
-  h['mic'] = if node['mic']
-               'YES'
-             else
-               'NO'
-             end
+  # MIC
+  h['mic'] = node['mic']
 
+  # power monitoring
   h['wattmeter'] = cluster.fetch('metrics', []).any?{|metric| metric['name'].match(/wattmetre_power_watt|pdu_outlet_power_watt/)} ? "YES" : "NO"
 
   h['cluster_priority'] = (cluster['priority'] || Time.parse(cluster['created_at'].to_s).strftime('%Y%m')).to_i
 
-  h['max_walltime'] = 0 # default
-  h['max_walltime'] = node['supported_job_types']['max_walltime'] if node['supported_job_types'] && node['supported_job_types'].has_key?('max_walltime')
-
-  h['production'] = get_production_property(node)
-  h['maintenance'] = get_maintenance_property(node)
+  # Exotic, Queue, maintenance,  walltime
+  h['exotic'] = node['exotic']
+  h['production'] = node['supported_job_types']['queues'].include?('production')
+  h['maintenance'] = node['supported_job_types']['queues'].include?('testing')
+  h['max_walltime'] = node['supported_job_types']['max_walltime']
 
   # Disk reservation
   h['disk_reservation_count'] = node['storage_devices'].select { |v| v['reservation'] }.length
 
   # convert booleans to YES/NO string
-  h.each do |k, v|
-    if v == true
-      h[k] = 'YES'
-    elsif v == false
-      h[k] = 'NO'
-    elsif v.is_a? Float
-      h[k] = v.to_s
-    end
-  end
-
+  h.select{|_k, v| [true, false].include? v}.map{|k, v| v ? 'YES' : 'NO' }
+  h.select{|_k, v| v.is_a? Float}.map{|k, v| v.to_s }
+  
   return h
 end
 
-def get_production_property(node)
-  production = false # default
-  production = node['supported_job_types']['queues'].include?('production') if node['supported_job_types'] && node['supported_job_types'].has_key?('queues')
-  production = production == true ? 'YES' : 'NO'
-  return production
-end
-
-def get_maintenance_property(node)
-  maintenance = false # default
-  maintenance = node['supported_job_types']['queues'].include?('testing') if node['supported_job_types'] && node['supported_job_types'].has_key?('queues')
-  maintenance = maintenance == true ? 'YES' : 'NO'
-  return maintenance
-end
-
 # Return a list of properties as a hash: { property1 => String, property2 => Integer, ... }
 # We detect the type of the property (Integer/String) by looking at the existing values
 def get_property_keys(properties)
@@ -504,7 +610,7 @@ end
 def properties_internal(properties)
   str = properties
             .to_a
-            .select{|k, _v| not ignore_default_keys.include? k}
+            .select{|k, _v| not IGNORE_DEFAULT_KEYS.include? k}
             .map do |(k, v)|
                 v = "YES" if v == true
                 v = "NO"  if v == false
@@ -652,10 +758,10 @@ def diff_properties(type, properties_oar, properties_ref)
   properties_ref ||= {}
 
   if type == 'default'
-    ignore_keys = ignore_keys()
+    ignore_keys = IGNORE_KEYS
   elsif type == 'disk'
     check_keys = %w(cluster host network_address available_upto deploy production maintenance disk diskpath cpuset)
-    ignore_keys = ignore_keys() - check_keys #Some key must be ignored for default but not for disks, ex: available_upto
+    ignore_keys = IGNORE_KEYS - check_keys #Some key must be ignored for default but not for disks, ex: available_upto
   end
   ignore_keys.each { |key| properties_oar.delete(key) }
   ignore_keys.each { |key| properties_ref.delete(key) }
@@ -663,93 +769,6 @@ def diff_properties(type, properties_oar, properties_ref)
   return Hashdiff.diff(properties_oar, properties_ref)
 end
 
-# These keys will not be created neither compared with the -d option
-# ignore_default_keys is only applied to resources of type 'default'
-def ignore_default_keys()
-  # default OAR at resource creation:
-  #  available_upto: '2147483647'
-  #  besteffort: 'YES'
-  #  core: ~
-  #  cpu: ~
-  #  cpuset: 0
-  #  deploy: 'NO'
-  #  desktop_computing: 'NO'
-  #  drain: 'NO'
-  #  expiry_date: 0
-  #  finaud_decision: 'YES'
-  #  host: ~
-  #  last_available_upto: 0
-  #  last_job_date: 0
-  #  network_address: server
-  #  next_finaud_decision: 'NO'
-  #  next_state: UnChanged
-  #  resource_id: 9
-  #  scheduler_priority: 0
-  #  state: Suspected
-  #  state_num: 3
-  #  suspended_jobs: 'NO'
-  #  type: default
-  ignore_default_keys = [
-    "slash_16",
-    "slash_17",
-    "slash_18",
-    "slash_19",
-    "slash_20",
-    "slash_21",
-    "slash_22",
-    "available_upto",
-    "chunks",
-    "comment", # TODO
-    "core", # This property was created by 'oar_resources_add'
-    "cpu", # This property was created by 'oar_resources_add'
-    "host", # This property was created by 'oar_resources_add'
-    "gpudevice", # New property taken into account by the new generator
-    "gpu", # New property taken into account by the new generator
-    "cpuset",
-    "desktop_computing",
-    "drain",
-    "expiry_date",
-    "finaud_decision",
-    "grub",
-    "jobs", # This property exists when a job is running
-    "last_available_upto",
-    "last_job_date",
-    "network_address", # TODO
-    "next_finaud_decision",
-    "next_state",
-    "rconsole", # TODO
-    "resource_id",
-    "scheduler_priority",
-    "state",
-    "state_num",
-    "subnet_address",
-    "subnet_prefix",
-    "suspended_jobs",
-    "thread",
-    "type", # TODO
-    "vlan",
-    "pdu",
-    "id", # id from API (= resource_id from oarnodes)
-    "api_timestamp", # from API
-    "links", # from API
-  ]
-  return ignore_default_keys
-end
-
-# Properties of resources of type 'disk' to ignore (for example, when
-# comparing resources of type 'default' with the -d option)
-def ignore_disk_keys()
-  ignore_disk_keys = [
-    "disk",
-    "diskpath"
-  ]
-  return ignore_disk_keys
-end
-
-def ignore_keys()
-  return ignore_default_keys() + ignore_disk_keys()
-end
-
 # Properties such as deploy and besteffort, that should not be created
 def oar_system_keys()
   [
@@ -1084,6 +1103,7 @@ def extract_clusters_description(clusters, site_name, options, data_hierarchy, s
   ############################################
 
   oar_resources = get_oar_resources_from_oar(options)
+  p 
 
   generated_hierarchy = {
       :nodes => []
@@ -1100,7 +1120,9 @@ def extract_clusters_description(clusters, site_name, options, data_hierarchy, s
       "core" => site_resources.length > 0 ? site_resources.map{|r| r["core"]}.max : 0,
       "gpu" => site_resources.length > 0 ? site_resources.map{|r| r["gpu"]}.select{|x| not x.nil?}.max : 0
   }
+  p next_rsc_ids["gpu"]
 
+  exit
   # Some existing cluster have GPUs, but no GPU ID has been allocated to them
   if next_rsc_ids["gpu"].nil?
     next_rsc_ids["gpu"] = 0
@@ -1124,10 +1146,11 @@ def extract_clusters_description(clusters, site_name, options, data_hierarchy, s
 
     node_count = cluster_nodes.length
 
-    cluster_resources = site_resources
-                            .select{|r| r["cluster"] == cluster_name}
-                            .select{|r| cluster_nodes.include?(r["host"].split(".")[0])}
-                            .sort_by{|r| [r["cpu"], r["core"]]}
+    pp site_resources
+    exit
+    cluster_resources = site_resources.select{|r| r["cluster"] == cluster_name}.select{|r| cluster_nodes.include?(r["host"].split(".")[0])}.sort_by{|r| [r["cpu"], r["core"]]}
+    pp cluster_resources
+    exit
 
     sanity_check_result = sanity_check(cluster_resources, site_resources)
     unless sanity_check_result
@@ -1137,6 +1160,8 @@ def extract_clusters_description(clusters, site_name, options, data_hierarchy, s
 
     first_node = cluster_nodes.first[1]
 
+    pp first_node
+    exit
     cpu_count = first_node['architecture']['nb_procs']
     cpu_core_count = first_node['architecture']['nb_cores'] / cpu_count
     cpu_thread_count = first_node['architecture']['nb_threads'] / cpu_count
@@ -1346,105 +1371,3 @@ def extract_clusters_description(clusters, site_name, options, data_hierarchy, s
   return generated_hierarchy
 end
 
-############################################
-# MAIN function
-############################################
-
-# This function is called from RAKE and is in charge of
-#   - printing OAR commands to
-#      > add a new cluster
-#      > update and existing cluster
-#   - execute these commands on an OAR server
-
-def generate_oar_properties(options)
-
-  # Reset the OAR API cache, because the rpec tests change the data in our back
-  # while calling multiple times this function.
-  @@oar_data = {}
-
-  options[:api] ||= {}
-  conf = RefRepo::Utils.get_api_config
-  options[:api][:user] = conf['username']
-  options[:api][:pwd] = conf['password']
-  options[:api][:uri] = conf['uri']
-  options[:ssh] ||= {}
-  options[:ssh][:user] ||= 'g5kadmin'
-  options[:ssh][:host] ||= 'oar.%s.g5kadmin'
-  options[:sites] = [options[:site]] # for compatibility with other generators
-
-  ############################################
-  # Fetch:
-  # 1) generated data from load_data_hierarchy
-  # 2) oar properties from the reference repository
-  ############################################
-
-  # Load the description from the ref-api (data/ dir)
-  data_hierarchy = load_data_hierarchy
-
-  # filter based on site/cluster
-  site_name = options[:site]
-
-  # Replace the site placeholder of ssh hosts by the site
-  options[:ssh][:host] = options[:ssh][:host].gsub('%s', site_name)
-
-  # If no cluster is given, then the clusters are the cluster of the given site
-  if not options.key? :clusters or options[:clusters].length == 0
-    if data_hierarchy['sites'].key? site_name
-      clusters = data_hierarchy['sites'][site_name]['clusters'].keys
-      options[:clusters] = clusters
-    else
-      raise("The provided site does not exist : I can't detect clusters")
-    end
-  else
-    clusters = options[:clusters]
-  end
-
-  # convert to OAR properties
-  refrepo_properties = get_oar_properties_from_the_ref_repo(data_hierarchy, {
-      :sites => [site_name]
-  })
-
-  # also fetch the resources hierarchy inside nodes (cores, gpus, etc.)
-  generated_hierarchy = extract_clusters_description(clusters,
-                                                       site_name,
-                                                       options,
-                                                       data_hierarchy,
-                                                       refrepo_properties[site_name])
-
-  ############################################
-  # Output generated information
-  ############################################
-
-  ret = 0
-
-  # DO=table
-  if options.key? :table and options[:table]
-    export_rows_as_formated_line(generated_hierarchy)
-  end
-
-  # Do=Diff
-  if options.key? :diff and options[:diff]
-    ret = do_diff(options, generated_hierarchy, refrepo_properties)
-  end
-
-  # DO=print
-  if options.key? :print and options[:print]
-    cmds = export_rows_as_oar_command(generated_hierarchy, site_name, refrepo_properties[site_name], data_hierarchy)
-
-    puts(cmds)
-  end
-
-
-  # Do=update
-  if options[:update]
-    printf 'Apply changes to the OAR server ' + options[:ssh][:host].gsub('%s', site_name) + ' ? (y/N) '
-    prompt = STDIN.gets.chomp
-    cmds = export_rows_as_oar_command(generated_hierarchy, site_name, refrepo_properties[site_name], data_hierarchy)
-    run_commands_via_ssh(cmds, options) if prompt.downcase == 'y'
-  end
-
-  return ret
-end
-
-end # Module
-include OarProperties
-- 
GitLab