Commit 00dea500 authored by Mathieu Giraud's avatar Mathieu Giraud

tools/validate-links.py: check also internal links

See #3364.
parent 26082ca7
......@@ -7,7 +7,9 @@ from urllib.parse import *
import re
DEFAULT_FILES = glob.glob('../site/*/*.html')
REGEX_HREF = re.compile('href="(.*?)"')
REGEX_ID = re.compile('id="(.*?)"')
STATUS = {
None: '? ',
......@@ -15,10 +17,17 @@ STATUS = {
True: 'ok'
}
def check_url(url):
def check_url(url, ids=[]):
# Internal links
if url.startswith('#'):
return (not url[1:]) or (url[1:] in ids)
# Relative links: TODO
if not url.startswith('http'):
return None
# External http(s) links
try:
req = requests.get(url)
return (req.status_code < 400)
......@@ -29,8 +38,11 @@ def check_url(url):
def check_file(f):
print('<-- ', f)
content = ''.join(open(f).readlines())
ids = REGEX_ID.findall(content)
for url in REGEX_HREF.findall(content):
ok = check_url(url)
ok = check_url(url, ids)
print(STATUS[ok] + ' ' + url)
print()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment