--- /dev/null
+#!/usr/bin/python3
+
+import re, os, gzip
+from datetime import datetime
+from pathlib import Path
+from dataclasses import dataclass
+from jinja2 import Environment
+
+GET_PATTERN = r"^([^ ]+) ([^ ]+) - \[(.*)\] \"GET ([^ ]*)"
+
+TEMPLATE="""
+<!DOCTYPE html>
+
+<html lang="en">
+
+<head>
+ <meta charset="utf-8">
+ <title>Most visited pages</title>
+</head>
+<body>
+ <h1>Stats</h1>
+ <p>
+ Logs start at {{ logs_start_date }} ({{ logs_days_ago }} days ago)
+ </p>
+ <h2>Most visited pages</h2>
+ <p>
+ Total visits: {{ total_visits }}
+ </p>
+ <p>
+ Unique visits: {{ unique_visits }}
+ </p>
+ <p>
+ Most visited url: {{ most_visited_site }}
+ </p>
+</body>
+
+</html>
+"""
+
+@dataclass
+class Get:
+ source: str
+ dest: str
+ url: str
+
+logs_dir = Path(os.environ.get("LOGS_DIR", "/var/log/lighttpd"))
+
+log_files = [log for log in logs_dir.iterdir()
+ if log.is_file()
+ and log.name.startswith("access.log")]
+
+gets = []
+
+for file in log_files:
+ if file.suffix == ".gz":
+ decoder = gzip.open(file)
+ text = decoder.read().decode()
+ else:
+ text = file.read_text()
+
+ for match in re.finditer(GET_PATTERN, text, flags=re.MULTILINE):
+ get = Get(match.group(1), match.group(2), match.group(4))
+ gets.append(get)
+
+urls = {}
+
+for get in gets:
+ try:
+ count = urls[get.url]
+ except KeyError:
+ count = 0
+
+ urls[get.url] = count + 1
+
+first_log_file = log_files[0]
+logs_start_date = datetime.fromtimestamp(first_log_file.stat().st_ctime)
+logs_days_ago = (datetime.now() - logs_start_date).days
+
+total_visits = len(gets)
+unique_visits = len(set(get.source for get in gets))
+most_visited_site = sorted(urls.items(), key=lambda item: item[1])[-1][0]
+
+environment = Environment()
+template = environment.from_string(TEMPLATE)
+
+print("Content-Type: text/html\r\n")
+
+print(template.render(
+ logs_start_date=logs_start_date.ctime(),
+ logs_days_ago=logs_days_ago,
+ total_visits=total_visits,
+ unique_visits=unique_visits,
+ most_visited_site=most_visited_site
+))