From: John Turner Date: Wed, 16 Jul 2025 03:01:13 +0000 (-0400) Subject: process line-by-line and add css styling to template X-Git-Url: https://jturnerusa.dev/gitweb/?a=commitdiff_plain;h=8d5fdbd8606e3a1c64286fe6cd2b70b18b5d392c;p=website process line-by-line and add css styling to template --- diff --git a/stats.py b/stats.py index 544e8fa..824bfa4 100755 --- a/stats.py +++ b/stats.py @@ -7,44 +7,53 @@ from dataclasses import dataclass from jinja2 import Environment from time import time -GET_PATTERN = r"^([^ ]+) ([^ ]+) - \[(.*)\] \"GET ([^ ]*)" +LINE_PATTERN = r"^([^ ]+) ([^ ]+) - \[(.*)\] \"([A-Z]+) ([^ ]+) HTTP/\d\.\d\"" TEMPLATE=""" - - - Most visited pages - - -

Stats

-

- Logs start at {{ logs_start_date }} ({{ logs_days_ago }} days ago) -

-

- Stats generated in {{ seconds }}s -

-

- Total visits: {{ total_visits }} -

-

- Unique visits: {{ unique_visits }} -

-

- Most visited url: {{ most_visited_site }} -

- - + + + Most visited pages + + + + +
+

Stats

+

+ Logs start at {{ logs_start_date }} ({{ logs_days_ago }} days ago) +

+

+ Stats generated in {{ seconds }}s +

+

+ Total visits: {{ total_visits }} +

+

+ Unique visits: {{ unique_visits }} +

+

+ Most visited url: {{ most_visited_site }} +

+
+ """ @dataclass -class Get: +class Line: source: str dest: str - url: str + time: str + kind: str + resource: str start_time = time() @@ -54,44 +63,47 @@ log_files = [log for log in logs_dir.iterdir() if log.is_file() and log.name.startswith("access.log")] -gets = [] +urls = {} +seen = set() +total_visits = 0 for file in log_files: if file.suffix == ".gz": - decoder = gzip.open(file) - text = decoder.read().decode() + reader = gzip.open(file, "rt") else: - text = file.read_text() - - for match in re.finditer(GET_PATTERN, text, flags=re.MULTILINE): - get = Get(match.group(1), match.group(2), match.group(4)) - gets.append(get) - -urls = {} - -for get in gets: - try: - count = urls[get.url] - except KeyError: - count = 0 - - urls[get.url] = count + 1 + reader = file.open() + + for line in reader: + if (match := re.match(LINE_PATTERN, line)): + line = Line( + match.group(1), + match.group(2), + match.group(3), + match.group(4), + match.group(5) + ) + + if line.kind == "GET": + try: + count = urls[line.resource] + except KeyError: + count = 0 + + urls[line.resource] = count + 1 + total_visits += 1 + seen.add(line.source) + first_log_file = log_files[0] logs_start_date = datetime.fromtimestamp(first_log_file.stat().st_ctime) logs_days_ago = (datetime.now() - logs_start_date).days - -total_visits = len(gets) -unique_visits = len(set(get.source for get in gets)) +unique_visits = len(seen) most_visited_site = sorted(urls.items(), key=lambda item: item[1])[-1][0] +seconds = round(time() - start_time, 3) environment = Environment() template = environment.from_string(TEMPLATE) -seconds = round(time() - start_time, 3) - -print("Content-Type: text/html\r\n") - print(template.render( seconds=seconds, logs_start_date=logs_start_date.ctime(),