diff --git a/tools/validate-links.py b/tools/validate-links.py index 102340b9f62e931fd7d7e2ee91f3af2d4ee14279..b544e688db81c54a48b3e4b1c81a062ce43255af 100644 --- a/tools/validate-links.py +++ b/tools/validate-links.py @@ -7,7 +7,9 @@ from urllib.parse import * import re DEFAULT_FILES = glob.glob('../site/*/*.html') + REGEX_HREF = re.compile('href="(.*?)"') +REGEX_ID = re.compile('id="(.*?)"') STATUS = { None: '? ', @@ -15,10 +17,17 @@ STATUS = { True: 'ok' } -def check_url(url): +def check_url(url, ids=[]): + + # Internal links + if url.startswith('#'): + return (not url[1:]) or (url[1:] in ids) + + # Relative links: TODO if not url.startswith('http'): return None - + + # External http(s) links try: req = requests.get(url) return (req.status_code < 400) @@ -29,8 +38,11 @@ def check_url(url): def check_file(f): print('<-- ', f) content = ''.join(open(f).readlines()) + + ids = REGEX_ID.findall(content) + for url in REGEX_HREF.findall(content): - ok = check_url(url) + ok = check_url(url, ids) print(STATUS[ok] + ' ' + url) print()