#!/usr/bin/python3
-import re, os, gzip
+import re
+import os
+import gzip
from datetime import datetime
from pathlib import Path
from dataclasses import dataclass
from jinja2 import Environment
from time import time
-LINE_PATTERN = r"^([^ ]+) ([^ ]+) - \[(.*)\] \"([A-Z]+) ([^ ]+) HTTP/\d\.\d\""
+LINE_PATTERN = (
+ r"^([^ ]+) ([^ ]+) - \[(.*)\] \"([A-Z]+) ([^ ]+) (HTTP/\d\.\d)\""
+ r" ([0-9]+) ([0-9]+) \"-\" \"([^\"]+)\""
+)
-TEMPLATE="""
+TEMPLATE = """
<!DOCTYPE html>
<html lang="en">
</html>
"""
+
@dataclass
class Line:
source: str
time: str
kind: str
resource: str
+ version: str
+ code: int
+ size: int
+ agent: str
+
start_time = time()
logs_dir = Path(os.environ.get("LOGS_DIR", "/var/log/lighttpd"))
-log_files = [log for log in logs_dir.iterdir()
- if log.is_file()
- and log.name.startswith("access.log")]
+log_files = [
+ log
+ for log in logs_dir.iterdir()
+ if log.is_file() and log.name.startswith("access.log")
+]
-urls = {}
-seen = set()
+urls: dict[str, int] = {}
+seen: set[str] = set()
total_visits = 0
for file in log_files:
reader = file.open()
for line in reader:
- if (match := re.match(LINE_PATTERN, line)):
- line = Line(
+ if match := re.match(LINE_PATTERN, line):
+ entry = Line(
match.group(1),
match.group(2),
match.group(3),
match.group(4),
- match.group(5)
+ match.group(5),
+ match.group(6),
+ int(match.group(7)),
+ int(match.group(8)),
+ match.group(9),
)
- if line.kind == "GET":
+ if entry.kind == "GET" and entry.code == 200:
try:
- count = urls[line.resource]
+ count = urls[entry.resource]
except KeyError:
count = 0
-
- urls[line.resource] = count + 1
+
+ urls[entry.resource] = count + 1
total_visits += 1
- seen.add(line.source)
-
+ seen.add(entry.source)
first_log_file = log_files[0]
logs_start_date = datetime.fromtimestamp(first_log_file.stat().st_ctime)
environment = Environment()
template = environment.from_string(TEMPLATE)
-print(template.render(
- seconds=seconds,
- logs_start_date=logs_start_date.ctime(),
- logs_days_ago=logs_days_ago,
- total_visits=total_visits,
- unique_visits=unique_visits,
- most_visited_site=most_visited_site
-))
+print(
+ template.render(
+ seconds=seconds,
+ logs_start_date=logs_start_date.ctime(),
+ logs_days_ago=logs_days_ago,
+ total_visits=total_visits,
+ unique_visits=unique_visits,
+ most_visited_site=most_visited_site,
+ )
+)