From 74aab8a42c479cdeeda9371dbd591a19d070c48e Mon Sep 17 00:00:00 2001 From: Daniele Nicolodi Date: Sat, 11 Jan 2025 17:13:14 +0100 Subject: docs: Add a test to validate URLs in markdown/Users.md Avoid piling up dead URLs. --- docs/meson.build | 4 +++- docs/validatelinks.py | 42 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 1 deletion(-) create mode 100644 docs/validatelinks.py (limited to 'docs') diff --git a/docs/meson.build b/docs/meson.build index 3ad12b7fd..0ce884062 100644 --- a/docs/meson.build +++ b/docs/meson.build @@ -1,7 +1,7 @@ project('Meson documentation', version: '1.0') yaml_modname = get_option('unsafe_yaml') ? 'yaml' : 'strictyaml' -py = import('python').find_installation('python3', modules: [yaml_modname], required: false) +py = import('python').find_installation('python3', modules: [yaml_modname, 'aiohttp'], required: false) if not py.found() error(f'Cannot build documentation without yaml support') endif @@ -145,3 +145,5 @@ run_target('upload', ], depends: documentation, ) + +test('validate_links', find_program('./validatelinks.py'), args: meson.current_source_dir() / 'markdown' / 'Users.md') diff --git a/docs/validatelinks.py b/docs/validatelinks.py new file mode 100644 index 000000000..69544ab0e --- /dev/null +++ b/docs/validatelinks.py @@ -0,0 +1,42 @@ +#!/usr/bin/env python3 + +# SPDX-License-Identifier: Apache-2.0 +# Copyright 2025 The Meson development team + +import sys +import re +import aiohttp +import asyncio + +LINK = re.compile(r'\[(?P[A-Za-z0-9 ]+)\]\((?P.*?)\)') + + +async def fetch(session, name, url, timeout): + try: + async with session.get(url, timeout=timeout) as r: + if not r.ok: + return (name, url, r.status) + except Exception as e: + return (name, url, str(e)) + + +async def main(filename): + with open(filename) as f: + text = f.read() + timeout = aiohttp.ClientTimeout(total=60) + async with aiohttp.ClientSession() as session: + tasks = [] + for link in LINK.finditer(text): + name, url = link.groups() + task = asyncio.ensure_future(fetch(session, name, url, timeout)) + tasks.append(task) + responses = asyncio.gather(*tasks) + errors = [r for r in await responses if r is not None] + for name, url, result in errors: + print(f'"{name}" {url} {result}') + if errors: + sys.exit(1) + + +if __name__ == '__main__': + asyncio.run(main(sys.argv[1])) -- cgit v1.2.3