From b0f13b52437cffde4211bf8b3fcd7451995355c7 Mon Sep 17 00:00:00 2001 From: John Turner Date: Tue, 15 Jul 2025 20:29:40 -0400 Subject: [PATCH] add stats.py --- stats.py | 94 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 94 insertions(+) create mode 100755 stats.py diff --git a/stats.py b/stats.py new file mode 100755 index 0000000..012765f --- /dev/null +++ b/stats.py @@ -0,0 +1,94 @@ +#!/usr/bin/python3 + +import re, os, gzip +from datetime import datetime +from pathlib import Path +from dataclasses import dataclass +from jinja2 import Environment + +GET_PATTERN = r"^([^ ]+) ([^ ]+) - \[(.*)\] \"GET ([^ ]*)" + +TEMPLATE=""" + + + + + + + Most visited pages + + +

Stats

+

+ Logs start at {{ logs_start_date }} ({{ logs_days_ago }} days ago) +

+

Most visited pages

+

+ Total visits: {{ total_visits }} +

+

+ Unique visits: {{ unique_visits }} +

+

+ Most visited url: {{ most_visited_site }} +

+ + + +""" + +@dataclass +class Get: + source: str + dest: str + url: str + +logs_dir = Path(os.environ.get("LOGS_DIR", "/var/log/lighttpd")) + +log_files = [log for log in logs_dir.iterdir() + if log.is_file() + and log.name.startswith("access.log")] + +gets = [] + +for file in log_files: + if file.suffix == ".gz": + decoder = gzip.open(file) + text = decoder.read().decode() + else: + text = file.read_text() + + for match in re.finditer(GET_PATTERN, text, flags=re.MULTILINE): + get = Get(match.group(1), match.group(2), match.group(4)) + gets.append(get) + +urls = {} + +for get in gets: + try: + count = urls[get.url] + except KeyError: + count = 0 + + urls[get.url] = count + 1 + +first_log_file = log_files[0] +logs_start_date = datetime.fromtimestamp(first_log_file.stat().st_ctime) +logs_days_ago = (datetime.now() - logs_start_date).days + +total_visits = len(gets) +unique_visits = len(set(get.source for get in gets)) +most_visited_site = sorted(urls.items(), key=lambda item: item[1])[-1][0] + +environment = Environment() +template = environment.from_string(TEMPLATE) + +print("Content-Type: text/html\r\n") + +print(template.render( + logs_start_date=logs_start_date.ctime(), + logs_days_ago=logs_days_ago, + total_visits=total_visits, + unique_visits=unique_visits, + most_visited_site=most_visited_site +)) -- 2.39.5