diff options
Diffstat (limited to 'stats.py')
-rwxr-xr-x | stats.py | 94 |
1 files changed, 94 insertions, 0 deletions
diff --git a/stats.py b/stats.py new file mode 100755 index 0000000..012765f --- /dev/null +++ b/stats.py @@ -0,0 +1,94 @@ +#!/usr/bin/python3 + +import re, os, gzip +from datetime import datetime +from pathlib import Path +from dataclasses import dataclass +from jinja2 import Environment + +GET_PATTERN = r"^([^ ]+) ([^ ]+) - \[(.*)\] \"GET ([^ ]*)" + +TEMPLATE=""" +<!DOCTYPE html> + +<html lang="en"> + +<head> + <meta charset="utf-8"> + <title>Most visited pages</title> +</head> +<body> + <h1>Stats</h1> + <p> + Logs start at {{ logs_start_date }} ({{ logs_days_ago }} days ago) + </p> + <h2>Most visited pages</h2> + <p> + Total visits: {{ total_visits }} + </p> + <p> + Unique visits: {{ unique_visits }} + </p> + <p> + Most visited url: {{ most_visited_site }} + </p> +</body> + +</html> +""" + +@dataclass +class Get: + source: str + dest: str + url: str + +logs_dir = Path(os.environ.get("LOGS_DIR", "/var/log/lighttpd")) + +log_files = [log for log in logs_dir.iterdir() + if log.is_file() + and log.name.startswith("access.log")] + +gets = [] + +for file in log_files: + if file.suffix == ".gz": + decoder = gzip.open(file) + text = decoder.read().decode() + else: + text = file.read_text() + + for match in re.finditer(GET_PATTERN, text, flags=re.MULTILINE): + get = Get(match.group(1), match.group(2), match.group(4)) + gets.append(get) + +urls = {} + +for get in gets: + try: + count = urls[get.url] + except KeyError: + count = 0 + + urls[get.url] = count + 1 + +first_log_file = log_files[0] +logs_start_date = datetime.fromtimestamp(first_log_file.stat().st_ctime) +logs_days_ago = (datetime.now() - logs_start_date).days + +total_visits = len(gets) +unique_visits = len(set(get.source for get in gets)) +most_visited_site = sorted(urls.items(), key=lambda item: item[1])[-1][0] + +environment = Environment() +template = environment.from_string(TEMPLATE) + +print("Content-Type: text/html\r\n") + +print(template.render( + logs_start_date=logs_start_date.ctime(), + logs_days_ago=logs_days_ago, + total_visits=total_visits, + unique_visits=unique_visits, + most_visited_site=most_visited_site +)) |