#!/usr/bin/python3 import re import os import gzip from datetime import datetime from pathlib import Path from dataclasses import dataclass from jinja2 import Environment from time import time LINE_PATTERN = ( r"^([^ ]+) ([^ ]+) - \[(.*)\] \"([A-Z]+) ([^ ]+) (HTTP/\d\.\d)\"" r" ([0-9]+) ([0-9]+) \"-\" \"([^\"]+)\"" ) TEMPLATE = """ Most visited pages

Stats

Logs start at {{ logs_start_date }} ({{ logs_days_ago }} days ago)

Stats generated in {{ seconds }}s

Total visits: {{ total_visits }}

Unique visits: {{ unique_visits }}

Most visited url: {{ most_visited_site }}

""" @dataclass class Line: source: str dest: str time: str kind: str resource: str version: str code: int size: int agent: str start_time = time() logs_dir = Path(os.environ.get("LOGS_DIR", "/var/log/lighttpd")) log_files = [ log for log in logs_dir.iterdir() if log.is_file() and log.name.startswith("access.log") ] urls: dict[str, int] = {} seen: set[str] = set() total_visits = 0 for file in log_files: if file.suffix == ".gz": reader = gzip.open(file, "rt") else: reader = file.open() for line in reader: if match := re.match(LINE_PATTERN, line): entry = Line( match.group(1), match.group(2), match.group(3), match.group(4), match.group(5), match.group(6), int(match.group(7)), int(match.group(8)), match.group(9), ) if entry.kind == "GET" and entry.code == 200: try: count = urls[entry.resource] except KeyError: count = 0 urls[entry.resource] = count + 1 total_visits += 1 seen.add(entry.source) first_log_file = log_files[0] logs_start_date = datetime.fromtimestamp(first_log_file.stat().st_ctime) logs_days_ago = (datetime.now() - logs_start_date).days unique_visits = len(seen) most_visited_site = sorted(urls.items(), key=lambda item: item[1])[-1][0] seconds = round(time() - start_time, 3) environment = Environment() template = environment.from_string(TEMPLATE) print( template.render( seconds=seconds, logs_start_date=logs_start_date.ctime(), logs_days_ago=logs_days_ago, total_visits=total_visits, unique_visits=unique_visits, most_visited_site=most_visited_site, ) )