···
1
+
#! /usr/bin/env nix-shell
2
+
#! nix-shell -i "python3 -I" -p "python3.withPackages(p: with p; [ aiohttp rich structlog ])"
4
+
from argparse import ArgumentParser
5
+
from collections import defaultdict
6
+
from enum import IntEnum
7
+
from http import HTTPStatus
8
+
from pathlib import Path
9
+
import asyncio, json, logging
11
+
import aiohttp, structlog
12
+
from structlog.contextvars import bound_contextvars as log_context
15
+
LogLevel = IntEnum('LogLevel', {
16
+
lvl: getattr(logging, lvl)
17
+
for lvl in ('DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL')
19
+
LogLevel.__str__ = lambda self: self.name
22
+
EXPECTED_STATUS=frozenset((
23
+
HTTPStatus.OK, HTTPStatus.FOUND,
24
+
HTTPStatus.NOT_FOUND,
27
+
async def check(session, manpage: str, url: str) -> HTTPStatus:
28
+
with log_context(manpage=manpage, url=url):
29
+
logger.debug("Checking")
30
+
async with session.head(url) as resp:
31
+
st = HTTPStatus(resp.status)
33
+
case HTTPStatus.OK | HTTPStatus.FOUND:
35
+
case HTTPStatus.NOT_FOUND:
36
+
logger.error("Broken link!")
38
+
logger.info("Unexpected code", status=st)
39
+
case _ if 400 <= st < 600:
40
+
logger.warn("Unexpected error", status=st)
44
+
async def main(urls_path):
45
+
logger.info(f"Parsing {urls_path}")
46
+
with urls_path.open() as urls_file:
47
+
urls = json.load(urls_file)
49
+
count = defaultdict(lambda: 0)
51
+
logger.info(f"Checking URLs from {urls_path}")
52
+
async with aiohttp.ClientSession() as session:
53
+
for status in asyncio.as_completed([
54
+
check(session, manpage, url)
55
+
for manpage, url in urls.items()
57
+
count[await status]+=1
59
+
ok = count[HTTPStatus.OK] + count[HTTPStatus.FOUND]
60
+
broken = count[HTTPStatus.NOT_FOUND]
61
+
unknown = sum(c for st, c in count.items() if st not in EXPECTED_STATUS)
62
+
logger.info(f"Done: {broken} broken links, "
63
+
f"{ok} correct links, and {unknown} unexpected status")
68
+
def parse_args(args=None):
69
+
parser = ArgumentParser(
70
+
prog = 'check-manpage-urls',
71
+
description = 'Check the validity of the manpage URLs linked in the nixpkgs manual',
73
+
parser.add_argument(
74
+
'-l', '--log-level',
75
+
default = os.getenv('LOG_LEVEL', 'INFO'),
76
+
type = lambda s: LogLevel[s],
77
+
choices = list(LogLevel),
79
+
parser.add_argument(
85
+
return parser.parse_args(args)
88
+
if __name__ == "__main__":
93
+
structlog.configure(
94
+
wrapper_class=structlog.make_filtering_bound_logger(args.log_level),
96
+
logger = structlog.getLogger("check-manpage-urls.py")
98
+
urls_path = args.file
99
+
if urls_path is None:
100
+
REPO_ROOT = Path(__file__).parent.parent.parent.parent
101
+
logger.info(f"Assuming we are in a nixpkgs repo rooted at {REPO_ROOT}")
103
+
urls_path = REPO_ROOT / 'doc' / 'manpage-urls.json'
105
+
count = asyncio.run(main(urls_path))
107
+
sys.exit(0 if count[HTTPStatus.NOT_FOUND] == 0 else 1)