diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 6e8153beb27a502234bc0cbe741589a9c2f01888..60a2f09a28c22a2ae976c0e4fd7696b2b7b54169 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -1,10 +1,9 @@
 ---
 stages:
+  - deploy-to-api-servers
   - lint
-  - validate
-  - generate
-  - deploy
-  - checks
+  - check
+  - generate-access-history
 
 include:
   - project: 'grid5000/grid5000-gitlab-templates'
@@ -31,16 +30,17 @@ include:
     - wget --no-check-certificate -q https://www.grid5000.fr/certs/ca2019.grid5000.fr.crt -O /usr/local/share/ca-certificates/ca2019.grid5000.fr.crt
     - /usr/sbin/update-ca-certificates
 
-validate-data:
+validate-input-data:
   extends: .template-refrepo
-  stage: validate
+  stage: check
   script:
     - bundle exec rake valid:schema
     - bundle exec rake valid:duplicates
+    - bundle exec rake valid:required-unwanted-files
 
 wikigen:
   extends: .template-refrepo
-  stage: checks
+  stage: check
   allow_failure: true
   parallel:
     matrix:
@@ -57,9 +57,9 @@ wikigen:
     refs:
       - master
 
-generate-reference-api:
+check-no-changes-in-data:
   extends: .template-refrepo
-  stage: generate
+  stage: check
   script:
     - export TZ=Europe/Paris
     - bundle exec rake reference-api
@@ -70,7 +70,7 @@ generate-reference-api:
 
 generate-access-history:
   extends: .template-refrepo
-  stage: generate
+  stage: generate-access-history
   script:
     - export TZ=Europe/Paris
     - bundle exec rake gen:accesses-history
@@ -79,22 +79,22 @@ generate-access-history:
       - data/grid5000/accesses/accesses_mode_history.yaml
     expire_in: 1 week
 
-deploy:
-  stage: deploy
+deploy-to-api-servers:
+  stage: deploy-to-api-servers
   tags:
     - grid5000-shell
   script:
     - /srv/ci-runner-scripts/bin/update-api-servers
 
 rspec:
-  stage: checks  # we use 'checks' here to avoid blocking on this when updating the ref-repo
+  stage: check  # we use 'checks' here to avoid blocking on this when updating the ref-repo
   extends: .template-refrepo
   script:
     - export TZ=Europe/Paris
     - bundle exec rspec
 
 valid-homogeneity:
-  stage: checks
+  stage: check
   extends: .template-refrepo
   script:
     - bundle exec rake valid:homogeneity
diff --git a/Rakefile b/Rakefile
index 91f41906472b1b0f941e91dd4edb63610c24039b..ca90137f31fad67aca7e179621d3f4308504d69c 100644
--- a/Rakefile
+++ b/Rakefile
@@ -68,6 +68,16 @@ namespace :valid do
     exit(ret)
   end
 
+  desc "Check for required and unwanted files in input/"
+  task "required-unwanted-files" do
+    require 'refrepo/valid/input/required_unwanted_files'
+    options = {}
+    options[:sites] = ( ENV['SITE'] ? ENV['SITE'].split(',') : G5K_SITES )
+    options[:clusters] = ( ENV['CLUSTER'] ? ENV['CLUSTER'].split(',') : [] )
+    ret = yaml_input_required_unwanted_files(options)
+    exit(ret)
+  end
+
   desc "Check OAR properties -- parameters: [SITE={grenoble,...}] [CLUSTER={yeti,...}] [VERBOSE=1]"
   task "oar-properties" do
     require 'refrepo/valid/oar-properties'
diff --git a/lib/refrepo/valid/input/required_unwanted_files.rb b/lib/refrepo/valid/input/required_unwanted_files.rb
new file mode 100644
index 0000000000000000000000000000000000000000..aca7839fc3f49e31390ee477503b74b233367e4f
--- /dev/null
+++ b/lib/refrepo/valid/input/required_unwanted_files.rb
@@ -0,0 +1,42 @@
+def yaml_input_required_unwanted_files(options)
+  global_hash = load_yaml_file_hierarchy
+  sites = options[:sites]
+  clusters = options[:clusters]
+  input_dir = File.expand_path("../../../../input", File.dirname(__FILE__))
+
+  r = true
+
+  global_hash["sites"].each do |site_uid, site|
+    next if sites and not sites.include?(site_uid)
+    site_input_dir = File.expand_path("grid5000/sites/#{site_uid}", input_dir)
+
+    (Dir::entries(site_input_dir) - %w{. .. clusters networks servers pdus.yaml} - ["#{site_uid}.yaml"]).each do |f|
+      puts "ERROR: Unwanted file #{f} in #{site_input_dir}"
+      r = false
+    end
+
+    site.fetch("clusters", {}).each do |cluster_uid, cluster|
+      next if clusters and not clusters.empty? and not clusters.include?(cluster_uid)
+      cluster_input_dir = File.expand_path("clusters/#{cluster_uid}", site_input_dir)
+
+      Dir::entries(cluster_input_dir).each do |f|
+        next if %w{. .. nodes pdus.yaml}.include?(f)
+        next if f =~ /^#{cluster_uid}(|_metrics|_pdus|_pdu|_retired|_ib|_extra).yaml(.erb)?$/
+        puts "ERROR: Unwanted file #{f} in #{cluster_input_dir}"
+        r = false
+      end
+
+      cluster["nodes"].each do |node_uid, node|
+        # check that per-node YAML file exists for non-retired nodes
+        if not File.exist?(File.expand_path("nodes/#{node_uid}.yaml", cluster_input_dir)) and not node['status'] == 'retired'
+          puts "ERROR: Missing nodes/#{node_uid}.yaml in #{cluster_input_dir}"
+          r = false
+        end
+      end
+    end
+  end
+  if not r
+    puts "Missing or unwanted files detected. This is OK if you are in the early stage of a cluster integration, but must be fixed before merging to master."
+  end
+  return r
+end