Mentions légales du service

Skip to content
Snippets Groups Projects
Verified Commit c257cc4b authored by Ludovic Courtès's avatar Ludovic Courtès Committed by Romain Garbage
Browse files

nginx: Return 403 for user agents corresponding to known AI robots.

* nginx-config/nginx.conf: Include @ROBOT-EXCLUSION@.
* head-node.scm (ai.robots.txt, robot-exclusion-nginx-file): New
variables.
(%nginx-config): Substitute “@ROBOT-EXCLUSION@” in the config file.
parent 4da59955
No related branches found
No related tags found
No related merge requests found
......@@ -7,6 +7,7 @@
(use-modules (gnu)
((guix store) #:select (%store-prefix))
(guix git-download)
(guix packages)
(guix modules)
((guix utils) #:select (substitute-keyword-arguments))
......@@ -349,20 +350,80 @@ CALENDAR, a gexp, and ensures at least FREE-SPACE GiB are available."
;;; NGINX.
;;;
(define ai.robots.txt
(let ((commit "5e7c3c432f8bad894363c7289b888328f98963f3"))
(origin
(method git-fetch)
(uri (git-reference
(url "https://github.com/ai-robots-txt/ai.robots.txt")
(commit commit)))
(file-name (string-append "ai.robots.txt-" (string-take commit 7)
"-checkout"))
(sha256
(base32
"14yblgpnnndzphfi8d6hlc44j7daz0w7x1p55if45jpsz34z1czc")))))
(define robot-exclusion-nginx-file
;; Return an nginx configuration file that can be included in the main file
;; to return 403 when the user-agent string matches a known AI robot that
;; does not respect 'robots.txt'.
(computed-file "robot-exclusion.nginx.conf"
(with-imported-modules '((guix build utils))
#~(begin
(use-modules (guix build utils)
(ice-9 match)
(ice-9 rdelim)
(ice-9 regex))
(define (robot-user-agents)
(define prefix
"User-agent: ")
(call-with-input-file #$(file-append ai.robots.txt
"/robots.txt")
(lambda (port)
(let loop ((user-agents '()))
(match (read-line port)
((? eof-object?)
(reverse user-agents))
(line
(if (string-prefix? prefix line)
(loop (cons (string-drop line
(string-length
prefix))
user-agents))
(loop user-agents))))))))
(call-with-output-file #$output
(lambda (port)
(format port "\
# Automatically generated from 'ai.robots.txt'.
if ($http_user_agent ~~ \"(~a)\" ) {
return 403;
break;
}\n"
(string-join (map regexp-quote
(robot-user-agents))
"|"))))))))
(define %nginx-config
(computed-file "nginx-config"
(with-imported-modules
'((guix build utils))
#~(begin
(use-modules (guix build utils))
(mkdir #$output)
(chdir #$output)
(symlink #$(local-file "nginx-config/nginx.conf")
"nginx.conf")
(copy-file #$(local-file
"nginx-config/nginx-locations.conf")
"nginx-locations.conf")))))
'((guix build utils))
#~(begin
(use-modules (guix build utils))
(mkdir #$output)
(chdir #$output)
(copy-file #$(local-file "nginx-config/nginx.conf")
"nginx.conf")
(substitute* "nginx.conf"
(("@ROBOT-EXCLUSION@")
#$robot-exclusion-nginx-file))
(copy-file #$(local-file
"nginx-config/nginx-locations.conf")
"nginx-locations.conf")))))
(define %nginx-gitlab-token
;; Create /etc/nginx-tokens with a random token if it doesn't exist.
......
......@@ -45,6 +45,9 @@ http {
add_header X-Frame-Options SAMEORIGIN;
# Return 403 when the user-agent string is that of an AI robot.
include @ROBOT-EXCLUSION@;
include nginx-locations.conf;
}
......@@ -74,6 +77,9 @@ http {
add_header Strict-Transport-Security max-age=15552000;
add_header X-Frame-Options SAMEORIGIN;
# Return 403 when the user-agent string is that of an AI robot.
include @ROBOT-EXCLUSION@;
include nginx-locations.conf;
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment