diff options
author | John Turner <jturner.usa@gmail.com> | 2025-07-17 15:59:54 -0400 |
---|---|---|
committer | John Turner <jturner.usa@gmail.com> | 2025-07-17 16:06:21 -0400 |
commit | c117573bbd78bbb13ffc252f6ea776f295b2112e (patch) | |
tree | 6a32463ba935b62c538bebf2982ebf2178d77c42 | |
parent | e96955096fce80def6d24d327f4e8be6f0c11e7a (diff) | |
download | website-c117573bbd78bbb13ffc252f6ea776f295b2112e.tar.gz |
stats.py: only count visits that were sucessful
-rwxr-xr-x | stats.py | 68 |
1 files changed, 43 insertions, 25 deletions
@@ -1,15 +1,20 @@ #!/usr/bin/python3 -import re, os, gzip +import re +import os +import gzip from datetime import datetime from pathlib import Path from dataclasses import dataclass from jinja2 import Environment from time import time -LINE_PATTERN = r"^([^ ]+) ([^ ]+) - \[(.*)\] \"([A-Z]+) ([^ ]+) HTTP/\d\.\d\"" +LINE_PATTERN = ( + r"^([^ ]+) ([^ ]+) - \[(.*)\] \"([A-Z]+) ([^ ]+) (HTTP/\d\.\d)\"" + r" ([0-9]+) ([0-9]+) \"-\" \"([^\"]+)\"" +) -TEMPLATE=""" +TEMPLATE = """ <!DOCTYPE html> <html lang="en"> @@ -47,6 +52,7 @@ TEMPLATE=""" </html> """ + @dataclass class Line: source: str @@ -54,17 +60,24 @@ class Line: time: str kind: str resource: str + version: str + code: int + size: int + agent: str + start_time = time() logs_dir = Path(os.environ.get("LOGS_DIR", "/var/log/lighttpd")) -log_files = [log for log in logs_dir.iterdir() - if log.is_file() - and log.name.startswith("access.log")] +log_files = [ + log + for log in logs_dir.iterdir() + if log.is_file() and log.name.startswith("access.log") +] -urls = {} -seen = set() +urls: dict[str, int] = {} +seen: set[str] = set() total_visits = 0 for file in log_files: @@ -74,25 +87,28 @@ for file in log_files: reader = file.open() for line in reader: - if (match := re.match(LINE_PATTERN, line)): - line = Line( + if match := re.match(LINE_PATTERN, line): + entry = Line( match.group(1), match.group(2), match.group(3), match.group(4), - match.group(5) + match.group(5), + match.group(6), + int(match.group(7)), + int(match.group(8)), + match.group(9), ) - if line.kind == "GET": + if entry.kind == "GET" and entry.code == 200: try: - count = urls[line.resource] + count = urls[entry.resource] except KeyError: count = 0 - - urls[line.resource] = count + 1 + + urls[entry.resource] = count + 1 total_visits += 1 - seen.add(line.source) - + seen.add(entry.source) first_log_file = log_files[0] logs_start_date = datetime.fromtimestamp(first_log_file.stat().st_ctime) @@ -104,11 +120,13 @@ seconds = round(time() - start_time, 3) environment = Environment() template = environment.from_string(TEMPLATE) -print(template.render( - seconds=seconds, - logs_start_date=logs_start_date.ctime(), - logs_days_ago=logs_days_ago, - total_visits=total_visits, - unique_visits=unique_visits, - most_visited_site=most_visited_site -)) +print( + template.render( + seconds=seconds, + logs_start_date=logs_start_date.ctime(), + logs_days_ago=logs_days_ago, + total_visits=total_visits, + unique_visits=unique_visits, + most_visited_site=most_visited_site, + ) +) |