diff options
author | John Turner <jturner.usa@gmail.com> | 2025-09-23 21:11:01 -0400 |
---|---|---|
committer | John Turner <jturner.usa@gmail.com> | 2025-09-23 21:48:01 -0400 |
commit | 83040f948f6e8264fb427b5d9cf338f46a906fe1 (patch) | |
tree | d0efab55a60a10c638df2e5b4194a060c9a85c38 | |
parent | 2aa14b931f1a4fa7f465537b5310217b84615203 (diff) | |
download | pypaste-83040f948f6e8264fb427b5d9cf338f46a906fe1.tar.gz |
change keys to 78 bit random tokens and humanize the result
Instead of picking random words via a random choice function, we
generate a 13 * key_length bit random int as the key. We humanize the key by
splitting it up into 13 bit segments and using the 13 bits as an index
into the word list.
This allows us to store the keys in binary form which is faster and
uses less space.
-rw-r--r-- | meson.build | 4 | ||||
-rw-r--r-- | pypaste/server/__init__.py | 65 | ||||
-rw-r--r-- | pypaste/server/__main__.py | 8 | ||||
-rw-r--r-- | pypaste/server/s3/__init__.py | 28 | ||||
-rw-r--r-- | pypaste/server/sqlite/__init__.py | 37 | ||||
-rwxr-xr-x | tests/test_server.py | 23 | ||||
-rwxr-xr-x | tests/test_sqlite_storage.py | 26 |
7 files changed, 116 insertions, 75 deletions
diff --git a/meson.build b/meson.build index 13022e0..866a1c9 100644 --- a/meson.build +++ b/meson.build @@ -43,6 +43,10 @@ python = import('python').find_installation( python.install_sources(sources, preserve_path: true) if get_option('tests').enabled() + pytest = find_program('pytest') + + test('unit_tests', pytest, args: sources) + subdir('tests') endif diff --git a/pypaste/server/__init__.py b/pypaste/server/__init__.py index 5f34338..2ecd975 100644 --- a/pypaste/server/__init__.py +++ b/pypaste/server/__init__.py @@ -25,7 +25,7 @@ from pygments.lexers import guess_lexer, get_lexer_by_name from pygments.formatters import HtmlFormatter from pygments.styles import get_style_by_name from abc import abstractmethod -from secrets import choice +from secrets import randbits from pypaste import WORDLIST from functools import lru_cache @@ -54,13 +54,35 @@ def pygmentize( return highlight(content, lexer, formatter) -def keygen(length: int) -> str: - return "-".join(choice(WORDLIST) for _ in range(length)) +@dataclass +class Key: + data: bytes + length: int + + +def keygen(length: int) -> Key: + return Key(randbits(13 * length).to_bytes(13 * length // 8 + 1), length) + + +def humanize(key: Key) -> str: + return "-".join( + WORDLIST[(int.from_bytes(key.data) >> (13 * i)) & (2**13 - 1)] + for i in range(key.length) + ) + + +def dehumanize(s: str) -> Key: + words = s.split("-") + + data = 0 + for i, word in enumerate(words): + data |= WORDLIST.index(word) << (13 * i) + + return Key(data.to_bytes(13 * len(words) // 8 + 1), len(words)) @dataclass class Paste: - key: str dt: datetime syntax: Optional[str] text: str @@ -75,29 +97,29 @@ class Storage: pass @abstractmethod - async def insert(self, paste: Paste) -> None: + async def insert(self, paste: Paste, key: Key) -> None: pass @abstractmethod - async def retrieve(self, key: str) -> Optional[Paste]: + async def retrieve(self, key: Key) -> Optional[Paste]: pass @abstractmethod - async def delete(self, key) -> None: + async def delete(self, key: Key) -> None: pass @abstractmethod - async def exists(self, key: str) -> bool: + async def exists(self, key: Key) -> bool: pass @abstractmethod async def vacuum(self, size: int) -> None: pass - async def read_row(self, key: str) -> Optional[Tuple[datetime, int, Optional[str]]]: + async def read_row(self, key: Key) -> Optional[Tuple[datetime, int, Optional[str]]]: async with self.connection.execute( "select pastes.datetime,pastes.size,pastes.syntax from pastes where pastes.key=? limit 1", - (key,), + (key.data,), ) as cursor: match await cursor.fetchone(): case [str(dt), int(size), syntax]: @@ -131,7 +153,7 @@ class App: return web.HTTPBadRequest(text="provide a key to fetch") try: - paste = await self.storage.retrieve(key) + paste = await self.storage.retrieve(dehumanize(key)) except Exception as e: log_error(f"failed to retrieve paste {key}: {e}") return web.HTTPInternalServerError() @@ -188,24 +210,25 @@ class App: text="content must be unicode only, no binary data is allowed" ) - try: - while await self.storage.exists((key := keygen(self.config.key_length))): - pass - except Exception as e: - log_error(str(e)) - return web.HTTPInternalServerError() + key = keygen(self.config.key_length) try: - paste = Paste(key, datetime.now(), syntax, text) - await self.storage.insert(paste) + paste = Paste(datetime.now(), syntax, text) + await self.storage.insert(paste, key) except Exception as e: log_error(f"failed to insert paste {key} to storage: {e}") return web.HTTPInternalServerError() - url = f"{self.config.site}/paste/{key}" + url = f"{self.config.site}/paste/{humanize(key)}" log_info( - f"uploaded paste {key} with syntax {syntax} of size {len(data)} bytes: {url}" + f"uploaded paste {key.data.hex()} with syntax {syntax} of size {len(data)} bytes: {url}" ) return web.HTTPOk(text=url) + + +def test_humanize_dehumanize_roundtrip() -> None: + key = keygen(6) + + assert key == dehumanize(humanize(key)) diff --git a/pypaste/server/__main__.py b/pypaste/server/__main__.py index f5a0180..41b94b5 100644 --- a/pypaste/server/__main__.py +++ b/pypaste/server/__main__.py @@ -79,7 +79,13 @@ async def main() -> int: try: await connection.execute( ( - "create table if not exists pastes(key text, datetime text, size int, syntax text)" + "create table if not exists pastes(" + "key blob," + "key_length int," + "datetime text," + "size int," + "syntax text" + ")" ) ) await connection.commit() diff --git a/pypaste/server/s3/__init__.py b/pypaste/server/s3/__init__.py index 6026fc4..23be1ee 100644 --- a/pypaste/server/s3/__init__.py +++ b/pypaste/server/s3/__init__.py @@ -16,7 +16,7 @@ import asyncio import zstandard import aiosqlite -from pypaste.server import Storage, Paste +from pypaste.server import Storage, Paste, Key from pypaste.server.s3.bucket import Bucket from dataclasses import dataclass from typing import Optional @@ -39,28 +39,28 @@ class S3(Storage): self.bucket = Bucket(endpoint, region, bucket, access_key, secret_key) async def setup(self) -> None: - await self.connection.execute("create table if not exists s3(key text)") + await self.connection.execute("create table if not exists s3(key blob)") await self.connection.commit() - async def insert(self, paste: Paste) -> None: + async def insert(self, paste: Paste, key: Key) -> None: def compress(): return zstandard.compress(paste.text.encode()) compressed = await asyncio.to_thread(compress) await self.connection.execute( - "insert into pastes values(?, ?, ?, ?)", - (paste.key, paste.dt.isoformat(), len(compressed), paste.syntax), + "insert into pastes values(?, ?, ?, ?, ?)", + (key.data, key.length, paste.dt.isoformat(), len(compressed), paste.syntax), ) try: - await self.bucket.put(paste.key, compressed) + await self.bucket.put(key.data.hex(), compressed) await self.connection.commit() except Exception as e: await self.connection.rollback() raise e - async def retrieve(self, key: str) -> Optional[Paste]: + async def retrieve(self, key: Key) -> Optional[Paste]: if not await self.exists(key): return None @@ -70,7 +70,7 @@ class S3(Storage): (dt, size, syntax) = row - data = await self.bucket.get(key) + data = await self.bucket.get(key.data.hex()) assert data is not None @@ -79,21 +79,21 @@ class S3(Storage): text = await asyncio.to_thread(decompress) - return Paste(key, dt, syntax, text) + return Paste(dt, syntax, text) - async def delete(self, key: str) -> None: - await self.connection.execute("delete from pastes where key=?", (key,)) + async def delete(self, key: Key) -> None: + await self.connection.execute("delete from pastes where key=?", (key.data,)) try: - await self.bucket.delete(key) + await self.bucket.delete(key.data.hex()) await self.connection.commit() except Exception as e: await self.connection.rollback() raise e - async def exists(self, key: str) -> bool: + async def exists(self, key: Key) -> bool: async with self.connection.execute( - "select 1 from s3 where key=?", (key,) + "select 1 from s3 where key=?", (key.data,) ) as cursor: return await cursor.fetchone() is not None diff --git a/pypaste/server/sqlite/__init__.py b/pypaste/server/sqlite/__init__.py index c09e385..ee75d91 100644 --- a/pypaste/server/sqlite/__init__.py +++ b/pypaste/server/sqlite/__init__.py @@ -1,7 +1,7 @@ import asyncio import zstandard import aiosqlite -from pypaste.server import Storage, Paste +from pypaste.server import Storage, Paste, Key from dataclasses import dataclass from typing import Optional @@ -12,42 +12,41 @@ class Sqlite(Storage): async def setup(self) -> None: await self.connection.execute( - "create table if not exists sqlite(key text, data blob)" + "create table if not exists sqlite(key blob, data blob)" ) await self.connection.commit() - async def insert(self, paste: Paste) -> None: + async def insert(self, paste: Paste, key: Key) -> None: def compress(): return zstandard.compress(paste.text.encode()) data = await asyncio.to_thread(compress) await self.connection.execute( - "insert into pastes values(?, ?, ?, ?)", - (paste.key, paste.dt.isoformat(), len(data), paste.syntax), + "insert into pastes values(?, ?, ?, ?, ?)", + (key.data, key.length, paste.dt.isoformat(), len(data), paste.syntax), ) await self.connection.execute( "insert into sqlite values(?, ?)", ( - paste.key, + key.data, data, ), ) await self.connection.commit() - async def retrieve(self, key: str) -> Optional[Paste]: - if not await self.exists(key): - return None - + async def retrieve(self, key: Key) -> Optional[Paste]: async with self.connection.execute( - "select sqlite.data from sqlite where key=? limit 1", (key,) + "select sqlite.data from sqlite where key=? limit 1", (key.data,) ) as cursor: match await cursor.fetchone(): case [bytes(data)]: pass + case None: + return None case _: raise Exception("unreachable") @@ -62,16 +61,16 @@ class Sqlite(Storage): text = await asyncio.to_thread(decompress) - return Paste(key, dt, syntax, text) + return Paste(dt, syntax, text) - async def delete(self, key: str) -> None: - await self.connection.execute("delete from pastes where key=?", (key,)) + async def delete(self, key: Key) -> None: + await self.connection.execute("delete from pastes where key=?", (key.data,)) - await self.connection.execute("delete from sqlite where key=?", (key,)) + await self.connection.execute("delete from sqlite where key=?", (key.data,)) - async def exists(self, key: str) -> bool: + async def exists(self, key: Key) -> bool: async with self.connection.execute( - "select 1 from sqlite where key=?", (key,) + "select 1 from sqlite where key=?", (key.data,) ) as cursor: return await cursor.fetchone() is not None @@ -91,7 +90,7 @@ class Sqlite(Storage): async with self.connection.execute( ( - "select pastes.key from pastes " + "select pastes.key, pastes.key_length from pastes " "inner join sqlite on sqlite.key " "where pastes.key=sqlite.key " "order by pastes.datetime " @@ -101,7 +100,7 @@ class Sqlite(Storage): if (row := await cursor.fetchone()) is None: return else: - oldest = row[0] + oldest = Key(row[0], row[1]) if use > max: await self.delete(oldest) diff --git a/tests/test_server.py b/tests/test_server.py index 0ccfc5b..2b1ed2e 100755 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -7,6 +7,7 @@ import string import random import aiohttp from pathlib import Path +from asyncio import Queue def truncate(file: Path) -> None: @@ -20,10 +21,21 @@ def generate_name() -> str: return "".join(random.choice(characters) for _ in range(10)) +async def tee_pipe(proc: asyncio.subprocess.Process, queue: Queue[str]) -> None: + assert proc.stdout is not None + + while (line := await proc.stdout.readline()) is not None: + sys.stderr.write(line.decode()) + await queue.put(line.decode()) + + async def main() -> int: - with tempfile.TemporaryDirectory() as tmpdir: + with tempfile.TemporaryDirectory(delete=False) as tmpdir: + print(tmpdir, file=sys.stderr) + socket = Path(tmpdir) / (generate_name() + ".sock") database = Path(tmpdir) / generate_name() + queue: Queue[str] = Queue() truncate(database) @@ -38,7 +50,7 @@ async def main() -> int: "--content-length-max-bytes", "200000", "--key-length", - "3", + "6", "--database", database, "--storage-max-bytes", @@ -48,12 +60,9 @@ async def main() -> int: stderr=asyncio.subprocess.STDOUT, ) - assert proc.stdout is not None - - line = await proc.stdout.readline() + asyncio.create_task(tee_pipe(proc, queue)) - if b"starting" not in line: - print(line, file=sys.stderr) + if "starting" not in await queue.get(): return 1 connection = aiohttp.UnixConnector(path=str(socket)) diff --git a/tests/test_sqlite_storage.py b/tests/test_sqlite_storage.py index 384158c..6a93561 100755 --- a/tests/test_sqlite_storage.py +++ b/tests/test_sqlite_storage.py @@ -6,7 +6,7 @@ import tempfile import aiosqlite import string import random -from pypaste.server import Paste +from pypaste.server import Paste, Key, keygen from pypaste.server.sqlite import Sqlite from datetime import datetime from pathlib import Path @@ -24,11 +24,11 @@ def generate_key() -> str: async def test_exists_but_not_in_our_table(storage: Sqlite) -> None: - key = generate_key() + key = keygen(6) await storage.connection.execute( - "insert into pastes values(?, ?, ?, ?)", - (key, datetime.now().isoformat(), None, bytes()), + "insert into pastes values(?, ?, ?, ?, ?)", + (key.data, key.length, datetime.now().isoformat(), None, bytes()), ) assert not await storage.exists(key) @@ -36,20 +36,20 @@ async def test_exists_but_not_in_our_table(storage: Sqlite) -> None: async def test_exists(storage: Sqlite) -> None: dt = datetime.now() - key = generate_key() + key = keygen(6) - await storage.insert(Paste(key, dt, "test", "hello world")) + await storage.insert(Paste(dt, "test", "hello world"), key) assert await storage.exists(key) - assert not await storage.exists(generate_key()) + assert not await storage.exists(keygen(6)) async def test_delete(storage: Sqlite) -> None: dt = datetime.now() - key = generate_key() + key = keygen(6) - await storage.insert(Paste(key, dt, "test", "hello world")) + await storage.insert(Paste(dt, "test", "hello world"), key) assert await storage.exists(key) @@ -60,14 +60,13 @@ async def test_delete(storage: Sqlite) -> None: async def test_insert_retrieve(storage: Sqlite) -> None: dt = datetime.now() - key = generate_key() + key = keygen(6) - await storage.insert(Paste(key, dt, "test", "hello world")) + await storage.insert(Paste(dt, "test", "hello world"), key) paste = await storage.retrieve(key) assert paste is not None - assert paste.key == key assert paste.dt == dt assert paste.syntax == "test" assert paste.text == "hello world" @@ -81,7 +80,8 @@ async def main() -> int: await connection.execute( ( "create table pastes(" - "key text," + "key blob," + "key_length int," "datetime text," "size int," "syntax text" |