From 0477bdb8c736b48a17609d6c78ca619d7d5d21bf Mon Sep 17 00:00:00 2001
From: Lucas Nussbaum <lucas.nussbaum@inria.fr>
Date: Wed, 26 Mar 2025 15:48:28 +0100
Subject: [PATCH] [lib] add valid:duplicate-values: check for duplicate MAC,
 IP, IPv6 in generated data

---
 .gitlab-ci.yml                        |  1 +
 Rakefile                              | 13 ++++++-
 lib/refrepo/valid/duplicate_values.rb | 55 +++++++++++++++++++++++++++
 3 files changed, 68 insertions(+), 1 deletion(-)
 create mode 100644 lib/refrepo/valid/duplicate_values.rb

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 5d03a2828f..4f7042a2e4 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -38,6 +38,7 @@ validate-input-data:
     - bundle exec rake valid:schema
     - bundle exec rake valid:duplicates
     - bundle exec rake valid:required-unwanted-files
+    - bundle exec rake valid:duplicate-values
 
 wikigen:
   extends: .template-refrepo
diff --git a/Rakefile b/Rakefile
index e9afa9e612..2497d3828a 100644
--- a/Rakefile
+++ b/Rakefile
@@ -40,6 +40,7 @@ namespace :valid do
     require 'refrepo/valid/homogeneity'
     require 'refrepo/valid/input/duplicates'
     require 'refrepo/valid/input/schema'
+    require 'refrepo/valid/duplicate_values'
     options = {}
     options[:sites] = ( ENV['SITE'] ? ENV['SITE'].split(',') : G5K_SITES )
     options[:clusters] = ( ENV['CLUSTER'] ? ENV['CLUSTER'].split(',') : [] )
@@ -51,7 +52,9 @@ namespace :valid do
     ret2 = yaml_input_find_duplicates(options)
     puts "# Checking schema ..."
     ret3 = yaml_input_schema_validator(options)
-    exit(ret1 && ret2 && ret3)
+    puts "# Checking duplicate values ..."
+    ret4 = check_duplicate_values(options)
+    exit(ret1 && ret2 && ret3 && ret4)
   end
 
   desc "Check homogeneity of clusters -- parameters: [SITE={grenoble,..}] [CLUSTER={yeti,..}] [VERBOSE=1]"
@@ -77,6 +80,14 @@ namespace :valid do
     exit(ret)
   end
 
+  desc "Check for duplicates values in some fields that should be globally unique -- parameters: [SITE={grenoble..}]"
+  task "duplicate-values" do
+    require 'refrepo/valid/duplicate_values'
+    options = {}
+    ret = check_duplicate_values(options)
+    exit(ret)
+  end
+
   desc "Check input data schema validity -- parameters: [SITE={grenoble,..}] [CLUSTER={yeti,..}]"
   task "schema" do
     require 'refrepo/valid/input/schema'
diff --git a/lib/refrepo/valid/duplicate_values.rb b/lib/refrepo/valid/duplicate_values.rb
new file mode 100644
index 0000000000..fc44c48db2
--- /dev/null
+++ b/lib/refrepo/valid/duplicate_values.rb
@@ -0,0 +1,55 @@
+require 'refrepo/data_loader'
+def check_duplicate_values(_options)
+
+  netifs = []
+  refapi = load_data_hierarchy
+  refapi['sites'].each_pair do |site_uid, site|
+    site['clusters'].each_pair do |cluster_uid, cluster|
+      cluster['nodes'].to_h.each do |node_uid, node|
+        node['network_adapters'].each do |na|
+          netifs << { :site => site_uid,
+                      :cluster => cluster_uid,
+                      :node => node_uid,
+                      :iface => na['name'],
+                      :ip => na['ip'],
+                      :ip6 => na['ip6'],
+                      :mac => na['mac'].downcase,
+                      :mounted => na['mounted'],
+                      :mountable => na['mountable']
+          }
+        end
+      end
+    end
+  end
+
+  ret = true
+
+  dupe_macs = netifs.group_by { |e| e[:mac] }.to_a.select { |e| e[1].length > 1 }
+  if not dupe_macs.empty?
+    ret = false
+    dupe_macs.each do |e|
+      puts "ERROR: MAC #{e[0]} is used by several nodes:"
+      puts e[1].map { |n| n.to_s }.join("\n")
+    end
+  end
+
+  dupe_ips = netifs.select { |e| e[:ip] }.group_by { |e| e[:ip] }.to_a.select { |e| e[1].length > 1 }
+  if not dupe_ips.empty?
+    ret = false
+    dupe_ips.each do |e|
+      puts "ERROR: IP #{e[0]} is used by several nodes:"
+      puts e[1].map { |n| n.to_s }.join("\n")
+    end
+  end
+
+  dupe_ip6s = netifs.select { |e| e[:ip6] }.group_by { |e| e[:ip6] }.to_a.select { |e| e[1].length > 1 }
+  if not dupe_ips.empty?
+    ret = false
+    dupe_ip6s.each do |e|
+      puts "ERROR: IPv6 #{e[0]} is used by several nodes:"
+      puts e[1].map { |n| n.to_s }.join("\n")
+    end
+  end
+
+  return ret
+end
-- 
GitLab