diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 6e8153beb27a502234bc0cbe741589a9c2f01888..361067d17b8dc9ad3e102a8d0998eeaa3496ca12 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -37,6 +37,7 @@ validate-data:
   script:
     - bundle exec rake valid:schema
     - bundle exec rake valid:duplicates
+    - bundle exec rake valid:required-unwanted-files
 
 wikigen:
   extends: .template-refrepo
diff --git a/Rakefile b/Rakefile
index 91f41906472b1b0f941e91dd4edb63610c24039b..ca90137f31fad67aca7e179621d3f4308504d69c 100644
--- a/Rakefile
+++ b/Rakefile
@@ -68,6 +68,16 @@ namespace :valid do
     exit(ret)
   end
 
+  desc "Check for required and unwanted files in input/"
+  task "required-unwanted-files" do
+    require 'refrepo/valid/input/required_unwanted_files'
+    options = {}
+    options[:sites] = ( ENV['SITE'] ? ENV['SITE'].split(',') : G5K_SITES )
+    options[:clusters] = ( ENV['CLUSTER'] ? ENV['CLUSTER'].split(',') : [] )
+    ret = yaml_input_required_unwanted_files(options)
+    exit(ret)
+  end
+
   desc "Check OAR properties -- parameters: [SITE={grenoble,...}] [CLUSTER={yeti,...}] [VERBOSE=1]"
   task "oar-properties" do
     require 'refrepo/valid/oar-properties'
diff --git a/lib/refrepo/valid/input/required_unwanted_files.rb b/lib/refrepo/valid/input/required_unwanted_files.rb
new file mode 100644
index 0000000000000000000000000000000000000000..aca7839fc3f49e31390ee477503b74b233367e4f
--- /dev/null
+++ b/lib/refrepo/valid/input/required_unwanted_files.rb
@@ -0,0 +1,42 @@
+def yaml_input_required_unwanted_files(options)
+  global_hash = load_yaml_file_hierarchy
+  sites = options[:sites]
+  clusters = options[:clusters]
+  input_dir = File.expand_path("../../../../input", File.dirname(__FILE__))
+
+  r = true
+
+  global_hash["sites"].each do |site_uid, site|
+    next if sites and not sites.include?(site_uid)
+    site_input_dir = File.expand_path("grid5000/sites/#{site_uid}", input_dir)
+
+    (Dir::entries(site_input_dir) - %w{. .. clusters networks servers pdus.yaml} - ["#{site_uid}.yaml"]).each do |f|
+      puts "ERROR: Unwanted file #{f} in #{site_input_dir}"
+      r = false
+    end
+
+    site.fetch("clusters", {}).each do |cluster_uid, cluster|
+      next if clusters and not clusters.empty? and not clusters.include?(cluster_uid)
+      cluster_input_dir = File.expand_path("clusters/#{cluster_uid}", site_input_dir)
+
+      Dir::entries(cluster_input_dir).each do |f|
+        next if %w{. .. nodes pdus.yaml}.include?(f)
+        next if f =~ /^#{cluster_uid}(|_metrics|_pdus|_pdu|_retired|_ib|_extra).yaml(.erb)?$/
+        puts "ERROR: Unwanted file #{f} in #{cluster_input_dir}"
+        r = false
+      end
+
+      cluster["nodes"].each do |node_uid, node|
+        # check that per-node YAML file exists for non-retired nodes
+        if not File.exist?(File.expand_path("nodes/#{node_uid}.yaml", cluster_input_dir)) and not node['status'] == 'retired'
+          puts "ERROR: Missing nodes/#{node_uid}.yaml in #{cluster_input_dir}"
+          r = false
+        end
+      end
+    end
+  end
+  if not r
+    puts "Missing or unwanted files detected. This is OK if you are in the early stage of a cluster integration, but must be fixed before merging to master."
+  end
+  return r
+end