diff options
Diffstat (limited to 'stats.py')
-rwxr-xr-x | stats.py | 129 |
1 files changed, 0 insertions, 129 deletions
diff --git a/stats.py b/stats.py deleted file mode 100755 index 2f41569..0000000 --- a/stats.py +++ /dev/null @@ -1,129 +0,0 @@ -#!/usr/bin/python3 - -import re -import os -import gzip -from datetime import datetime -from pathlib import Path -from dataclasses import dataclass -from jinja2 import Environment -from time import time - -LINE_PATTERN = ( - r"^([^ ]+) ([^ ]+) - \[(.*)\] \"([A-Z]+) ([^ ]+) (HTTP/\d\.\d)\"" - r" ([0-9]+) ([0-9]+) \"-\" \"([^\"]+)\"" -) - -TEMPLATE = """ -<!DOCTYPE html> - -<html lang="en"> - - <head> - <meta charset="utf-8"> - <title>Most visited pages</title> - <style type="text/css"> - .stats { - text-align: center - } - </style> - </head> - - <body> - <div class="stats"> - <h1>Stats</h1> - <p> - Stats generated in {{ seconds }}s - </p> - <p> - Total visits: {{ total_visits }} - </p> - <p> - Unique visits: {{ unique_visits }} - </p> - <p> - Most visited url: {{ most_visited_site }} - </p> - </div> - </body> -</html> -""" - - -@dataclass -class Line: - source: str - dest: str - time: str - kind: str - resource: str - version: str - code: int - size: int - agent: str - - -start_time = time() - -logs_dir = Path(os.environ.get("LOGS_DIR", "/var/log/lighttpd")) - -log_files = [ - log - for log in logs_dir.iterdir() - if log.is_file() and log.name.startswith("access.log") -] - -urls: dict[str, int] = {} -seen: set[str] = set() -total_visits = 0 - -for file in log_files: - if file.suffix == ".gz": - reader = gzip.open(file, "rt") - else: - reader = file.open() - - for line in reader: - if match := re.match(LINE_PATTERN, line): - entry = Line( - match.group(1), - match.group(2), - match.group(3), - match.group(4), - match.group(5), - match.group(6), - int(match.group(7)), - int(match.group(8)), - match.group(9), - ) - - if entry.kind == "GET" and entry.code == 200: - try: - count = urls[entry.resource] - except KeyError: - count = 0 - - urls[entry.resource] = count + 1 - total_visits += 1 - seen.add(entry.source) - -first_log_file = log_files[-1] -logs_start_date = datetime.fromtimestamp(first_log_file.stat().st_ctime) -logs_days_ago = (datetime.now() - logs_start_date).days -unique_visits = len(seen) -most_visited_site = sorted(urls.items(), key=lambda item: item[1])[-1][0] -seconds = round(time() - start_time, 3) - -environment = Environment() -template = environment.from_string(TEMPLATE) - -print( - template.render( - seconds=seconds, - logs_start_date=logs_start_date.ctime(), - logs_days_ago=logs_days_ago, - total_visits=total_visits, - unique_visits=unique_visits, - most_visited_site=most_visited_site, - ) -) |