From: John Turner Date: Thu, 17 Jul 2025 19:59:54 +0000 (-0400) Subject: stats.py: only count visits that were sucessful X-Git-Url: https://jturnerusa.dev/gitweb/?a=commitdiff_plain;h=c117573bbd78bbb13ffc252f6ea776f295b2112e;p=website stats.py: only count visits that were sucessful --- diff --git a/stats.py b/stats.py index 824bfa4..448beca 100755 --- a/stats.py +++ b/stats.py @@ -1,15 +1,20 @@ #!/usr/bin/python3 -import re, os, gzip +import re +import os +import gzip from datetime import datetime from pathlib import Path from dataclasses import dataclass from jinja2 import Environment from time import time -LINE_PATTERN = r"^([^ ]+) ([^ ]+) - \[(.*)\] \"([A-Z]+) ([^ ]+) HTTP/\d\.\d\"" +LINE_PATTERN = ( + r"^([^ ]+) ([^ ]+) - \[(.*)\] \"([A-Z]+) ([^ ]+) (HTTP/\d\.\d)\"" + r" ([0-9]+) ([0-9]+) \"-\" \"([^\"]+)\"" +) -TEMPLATE=""" +TEMPLATE = """ @@ -47,6 +52,7 @@ TEMPLATE=""" """ + @dataclass class Line: source: str @@ -54,17 +60,24 @@ class Line: time: str kind: str resource: str + version: str + code: int + size: int + agent: str + start_time = time() logs_dir = Path(os.environ.get("LOGS_DIR", "/var/log/lighttpd")) -log_files = [log for log in logs_dir.iterdir() - if log.is_file() - and log.name.startswith("access.log")] +log_files = [ + log + for log in logs_dir.iterdir() + if log.is_file() and log.name.startswith("access.log") +] -urls = {} -seen = set() +urls: dict[str, int] = {} +seen: set[str] = set() total_visits = 0 for file in log_files: @@ -74,25 +87,28 @@ for file in log_files: reader = file.open() for line in reader: - if (match := re.match(LINE_PATTERN, line)): - line = Line( + if match := re.match(LINE_PATTERN, line): + entry = Line( match.group(1), match.group(2), match.group(3), match.group(4), - match.group(5) + match.group(5), + match.group(6), + int(match.group(7)), + int(match.group(8)), + match.group(9), ) - if line.kind == "GET": + if entry.kind == "GET" and entry.code == 200: try: - count = urls[line.resource] + count = urls[entry.resource] except KeyError: count = 0 - - urls[line.resource] = count + 1 + + urls[entry.resource] = count + 1 total_visits += 1 - seen.add(line.source) - + seen.add(entry.source) first_log_file = log_files[0] logs_start_date = datetime.fromtimestamp(first_log_file.stat().st_ctime) @@ -104,11 +120,13 @@ seconds = round(time() - start_time, 3) environment = Environment() template = environment.from_string(TEMPLATE) -print(template.render( - seconds=seconds, - logs_start_date=logs_start_date.ctime(), - logs_days_ago=logs_days_ago, - total_visits=total_visits, - unique_visits=unique_visits, - most_visited_site=most_visited_site -)) +print( + template.render( + seconds=seconds, + logs_start_date=logs_start_date.ctime(), + logs_days_ago=logs_days_ago, + total_visits=total_visits, + unique_visits=unique_visits, + most_visited_site=most_visited_site, + ) +)