summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn Turner <jturner.usa@gmail.com>2025-09-23 21:11:01 -0400
committerJohn Turner <jturner.usa@gmail.com>2025-09-23 21:48:01 -0400
commit83040f948f6e8264fb427b5d9cf338f46a906fe1 (patch)
treed0efab55a60a10c638df2e5b4194a060c9a85c38
parent2aa14b931f1a4fa7f465537b5310217b84615203 (diff)
downloadpypaste-83040f948f6e8264fb427b5d9cf338f46a906fe1.tar.gz
change keys to 78 bit random tokens and humanize the result
Instead of picking random words via a random choice function, we generate a 13 * key_length bit random int as the key. We humanize the key by splitting it up into 13 bit segments and using the 13 bits as an index into the word list. This allows us to store the keys in binary form which is faster and uses less space.
-rw-r--r--meson.build4
-rw-r--r--pypaste/server/__init__.py65
-rw-r--r--pypaste/server/__main__.py8
-rw-r--r--pypaste/server/s3/__init__.py28
-rw-r--r--pypaste/server/sqlite/__init__.py37
-rwxr-xr-xtests/test_server.py23
-rwxr-xr-xtests/test_sqlite_storage.py26
7 files changed, 116 insertions, 75 deletions
diff --git a/meson.build b/meson.build
index 13022e0..866a1c9 100644
--- a/meson.build
+++ b/meson.build
@@ -43,6 +43,10 @@ python = import('python').find_installation(
python.install_sources(sources, preserve_path: true)
if get_option('tests').enabled()
+ pytest = find_program('pytest')
+
+ test('unit_tests', pytest, args: sources)
+
subdir('tests')
endif
diff --git a/pypaste/server/__init__.py b/pypaste/server/__init__.py
index 5f34338..2ecd975 100644
--- a/pypaste/server/__init__.py
+++ b/pypaste/server/__init__.py
@@ -25,7 +25,7 @@ from pygments.lexers import guess_lexer, get_lexer_by_name
from pygments.formatters import HtmlFormatter
from pygments.styles import get_style_by_name
from abc import abstractmethod
-from secrets import choice
+from secrets import randbits
from pypaste import WORDLIST
from functools import lru_cache
@@ -54,13 +54,35 @@ def pygmentize(
return highlight(content, lexer, formatter)
-def keygen(length: int) -> str:
- return "-".join(choice(WORDLIST) for _ in range(length))
+@dataclass
+class Key:
+ data: bytes
+ length: int
+
+
+def keygen(length: int) -> Key:
+ return Key(randbits(13 * length).to_bytes(13 * length // 8 + 1), length)
+
+
+def humanize(key: Key) -> str:
+ return "-".join(
+ WORDLIST[(int.from_bytes(key.data) >> (13 * i)) & (2**13 - 1)]
+ for i in range(key.length)
+ )
+
+
+def dehumanize(s: str) -> Key:
+ words = s.split("-")
+
+ data = 0
+ for i, word in enumerate(words):
+ data |= WORDLIST.index(word) << (13 * i)
+
+ return Key(data.to_bytes(13 * len(words) // 8 + 1), len(words))
@dataclass
class Paste:
- key: str
dt: datetime
syntax: Optional[str]
text: str
@@ -75,29 +97,29 @@ class Storage:
pass
@abstractmethod
- async def insert(self, paste: Paste) -> None:
+ async def insert(self, paste: Paste, key: Key) -> None:
pass
@abstractmethod
- async def retrieve(self, key: str) -> Optional[Paste]:
+ async def retrieve(self, key: Key) -> Optional[Paste]:
pass
@abstractmethod
- async def delete(self, key) -> None:
+ async def delete(self, key: Key) -> None:
pass
@abstractmethod
- async def exists(self, key: str) -> bool:
+ async def exists(self, key: Key) -> bool:
pass
@abstractmethod
async def vacuum(self, size: int) -> None:
pass
- async def read_row(self, key: str) -> Optional[Tuple[datetime, int, Optional[str]]]:
+ async def read_row(self, key: Key) -> Optional[Tuple[datetime, int, Optional[str]]]:
async with self.connection.execute(
"select pastes.datetime,pastes.size,pastes.syntax from pastes where pastes.key=? limit 1",
- (key,),
+ (key.data,),
) as cursor:
match await cursor.fetchone():
case [str(dt), int(size), syntax]:
@@ -131,7 +153,7 @@ class App:
return web.HTTPBadRequest(text="provide a key to fetch")
try:
- paste = await self.storage.retrieve(key)
+ paste = await self.storage.retrieve(dehumanize(key))
except Exception as e:
log_error(f"failed to retrieve paste {key}: {e}")
return web.HTTPInternalServerError()
@@ -188,24 +210,25 @@ class App:
text="content must be unicode only, no binary data is allowed"
)
- try:
- while await self.storage.exists((key := keygen(self.config.key_length))):
- pass
- except Exception as e:
- log_error(str(e))
- return web.HTTPInternalServerError()
+ key = keygen(self.config.key_length)
try:
- paste = Paste(key, datetime.now(), syntax, text)
- await self.storage.insert(paste)
+ paste = Paste(datetime.now(), syntax, text)
+ await self.storage.insert(paste, key)
except Exception as e:
log_error(f"failed to insert paste {key} to storage: {e}")
return web.HTTPInternalServerError()
- url = f"{self.config.site}/paste/{key}"
+ url = f"{self.config.site}/paste/{humanize(key)}"
log_info(
- f"uploaded paste {key} with syntax {syntax} of size {len(data)} bytes: {url}"
+ f"uploaded paste {key.data.hex()} with syntax {syntax} of size {len(data)} bytes: {url}"
)
return web.HTTPOk(text=url)
+
+
+def test_humanize_dehumanize_roundtrip() -> None:
+ key = keygen(6)
+
+ assert key == dehumanize(humanize(key))
diff --git a/pypaste/server/__main__.py b/pypaste/server/__main__.py
index f5a0180..41b94b5 100644
--- a/pypaste/server/__main__.py
+++ b/pypaste/server/__main__.py
@@ -79,7 +79,13 @@ async def main() -> int:
try:
await connection.execute(
(
- "create table if not exists pastes(key text, datetime text, size int, syntax text)"
+ "create table if not exists pastes("
+ "key blob,"
+ "key_length int,"
+ "datetime text,"
+ "size int,"
+ "syntax text"
+ ")"
)
)
await connection.commit()
diff --git a/pypaste/server/s3/__init__.py b/pypaste/server/s3/__init__.py
index 6026fc4..23be1ee 100644
--- a/pypaste/server/s3/__init__.py
+++ b/pypaste/server/s3/__init__.py
@@ -16,7 +16,7 @@
import asyncio
import zstandard
import aiosqlite
-from pypaste.server import Storage, Paste
+from pypaste.server import Storage, Paste, Key
from pypaste.server.s3.bucket import Bucket
from dataclasses import dataclass
from typing import Optional
@@ -39,28 +39,28 @@ class S3(Storage):
self.bucket = Bucket(endpoint, region, bucket, access_key, secret_key)
async def setup(self) -> None:
- await self.connection.execute("create table if not exists s3(key text)")
+ await self.connection.execute("create table if not exists s3(key blob)")
await self.connection.commit()
- async def insert(self, paste: Paste) -> None:
+ async def insert(self, paste: Paste, key: Key) -> None:
def compress():
return zstandard.compress(paste.text.encode())
compressed = await asyncio.to_thread(compress)
await self.connection.execute(
- "insert into pastes values(?, ?, ?, ?)",
- (paste.key, paste.dt.isoformat(), len(compressed), paste.syntax),
+ "insert into pastes values(?, ?, ?, ?, ?)",
+ (key.data, key.length, paste.dt.isoformat(), len(compressed), paste.syntax),
)
try:
- await self.bucket.put(paste.key, compressed)
+ await self.bucket.put(key.data.hex(), compressed)
await self.connection.commit()
except Exception as e:
await self.connection.rollback()
raise e
- async def retrieve(self, key: str) -> Optional[Paste]:
+ async def retrieve(self, key: Key) -> Optional[Paste]:
if not await self.exists(key):
return None
@@ -70,7 +70,7 @@ class S3(Storage):
(dt, size, syntax) = row
- data = await self.bucket.get(key)
+ data = await self.bucket.get(key.data.hex())
assert data is not None
@@ -79,21 +79,21 @@ class S3(Storage):
text = await asyncio.to_thread(decompress)
- return Paste(key, dt, syntax, text)
+ return Paste(dt, syntax, text)
- async def delete(self, key: str) -> None:
- await self.connection.execute("delete from pastes where key=?", (key,))
+ async def delete(self, key: Key) -> None:
+ await self.connection.execute("delete from pastes where key=?", (key.data,))
try:
- await self.bucket.delete(key)
+ await self.bucket.delete(key.data.hex())
await self.connection.commit()
except Exception as e:
await self.connection.rollback()
raise e
- async def exists(self, key: str) -> bool:
+ async def exists(self, key: Key) -> bool:
async with self.connection.execute(
- "select 1 from s3 where key=?", (key,)
+ "select 1 from s3 where key=?", (key.data,)
) as cursor:
return await cursor.fetchone() is not None
diff --git a/pypaste/server/sqlite/__init__.py b/pypaste/server/sqlite/__init__.py
index c09e385..ee75d91 100644
--- a/pypaste/server/sqlite/__init__.py
+++ b/pypaste/server/sqlite/__init__.py
@@ -1,7 +1,7 @@
import asyncio
import zstandard
import aiosqlite
-from pypaste.server import Storage, Paste
+from pypaste.server import Storage, Paste, Key
from dataclasses import dataclass
from typing import Optional
@@ -12,42 +12,41 @@ class Sqlite(Storage):
async def setup(self) -> None:
await self.connection.execute(
- "create table if not exists sqlite(key text, data blob)"
+ "create table if not exists sqlite(key blob, data blob)"
)
await self.connection.commit()
- async def insert(self, paste: Paste) -> None:
+ async def insert(self, paste: Paste, key: Key) -> None:
def compress():
return zstandard.compress(paste.text.encode())
data = await asyncio.to_thread(compress)
await self.connection.execute(
- "insert into pastes values(?, ?, ?, ?)",
- (paste.key, paste.dt.isoformat(), len(data), paste.syntax),
+ "insert into pastes values(?, ?, ?, ?, ?)",
+ (key.data, key.length, paste.dt.isoformat(), len(data), paste.syntax),
)
await self.connection.execute(
"insert into sqlite values(?, ?)",
(
- paste.key,
+ key.data,
data,
),
)
await self.connection.commit()
- async def retrieve(self, key: str) -> Optional[Paste]:
- if not await self.exists(key):
- return None
-
+ async def retrieve(self, key: Key) -> Optional[Paste]:
async with self.connection.execute(
- "select sqlite.data from sqlite where key=? limit 1", (key,)
+ "select sqlite.data from sqlite where key=? limit 1", (key.data,)
) as cursor:
match await cursor.fetchone():
case [bytes(data)]:
pass
+ case None:
+ return None
case _:
raise Exception("unreachable")
@@ -62,16 +61,16 @@ class Sqlite(Storage):
text = await asyncio.to_thread(decompress)
- return Paste(key, dt, syntax, text)
+ return Paste(dt, syntax, text)
- async def delete(self, key: str) -> None:
- await self.connection.execute("delete from pastes where key=?", (key,))
+ async def delete(self, key: Key) -> None:
+ await self.connection.execute("delete from pastes where key=?", (key.data,))
- await self.connection.execute("delete from sqlite where key=?", (key,))
+ await self.connection.execute("delete from sqlite where key=?", (key.data,))
- async def exists(self, key: str) -> bool:
+ async def exists(self, key: Key) -> bool:
async with self.connection.execute(
- "select 1 from sqlite where key=?", (key,)
+ "select 1 from sqlite where key=?", (key.data,)
) as cursor:
return await cursor.fetchone() is not None
@@ -91,7 +90,7 @@ class Sqlite(Storage):
async with self.connection.execute(
(
- "select pastes.key from pastes "
+ "select pastes.key, pastes.key_length from pastes "
"inner join sqlite on sqlite.key "
"where pastes.key=sqlite.key "
"order by pastes.datetime "
@@ -101,7 +100,7 @@ class Sqlite(Storage):
if (row := await cursor.fetchone()) is None:
return
else:
- oldest = row[0]
+ oldest = Key(row[0], row[1])
if use > max:
await self.delete(oldest)
diff --git a/tests/test_server.py b/tests/test_server.py
index 0ccfc5b..2b1ed2e 100755
--- a/tests/test_server.py
+++ b/tests/test_server.py
@@ -7,6 +7,7 @@ import string
import random
import aiohttp
from pathlib import Path
+from asyncio import Queue
def truncate(file: Path) -> None:
@@ -20,10 +21,21 @@ def generate_name() -> str:
return "".join(random.choice(characters) for _ in range(10))
+async def tee_pipe(proc: asyncio.subprocess.Process, queue: Queue[str]) -> None:
+ assert proc.stdout is not None
+
+ while (line := await proc.stdout.readline()) is not None:
+ sys.stderr.write(line.decode())
+ await queue.put(line.decode())
+
+
async def main() -> int:
- with tempfile.TemporaryDirectory() as tmpdir:
+ with tempfile.TemporaryDirectory(delete=False) as tmpdir:
+ print(tmpdir, file=sys.stderr)
+
socket = Path(tmpdir) / (generate_name() + ".sock")
database = Path(tmpdir) / generate_name()
+ queue: Queue[str] = Queue()
truncate(database)
@@ -38,7 +50,7 @@ async def main() -> int:
"--content-length-max-bytes",
"200000",
"--key-length",
- "3",
+ "6",
"--database",
database,
"--storage-max-bytes",
@@ -48,12 +60,9 @@ async def main() -> int:
stderr=asyncio.subprocess.STDOUT,
)
- assert proc.stdout is not None
-
- line = await proc.stdout.readline()
+ asyncio.create_task(tee_pipe(proc, queue))
- if b"starting" not in line:
- print(line, file=sys.stderr)
+ if "starting" not in await queue.get():
return 1
connection = aiohttp.UnixConnector(path=str(socket))
diff --git a/tests/test_sqlite_storage.py b/tests/test_sqlite_storage.py
index 384158c..6a93561 100755
--- a/tests/test_sqlite_storage.py
+++ b/tests/test_sqlite_storage.py
@@ -6,7 +6,7 @@ import tempfile
import aiosqlite
import string
import random
-from pypaste.server import Paste
+from pypaste.server import Paste, Key, keygen
from pypaste.server.sqlite import Sqlite
from datetime import datetime
from pathlib import Path
@@ -24,11 +24,11 @@ def generate_key() -> str:
async def test_exists_but_not_in_our_table(storage: Sqlite) -> None:
- key = generate_key()
+ key = keygen(6)
await storage.connection.execute(
- "insert into pastes values(?, ?, ?, ?)",
- (key, datetime.now().isoformat(), None, bytes()),
+ "insert into pastes values(?, ?, ?, ?, ?)",
+ (key.data, key.length, datetime.now().isoformat(), None, bytes()),
)
assert not await storage.exists(key)
@@ -36,20 +36,20 @@ async def test_exists_but_not_in_our_table(storage: Sqlite) -> None:
async def test_exists(storage: Sqlite) -> None:
dt = datetime.now()
- key = generate_key()
+ key = keygen(6)
- await storage.insert(Paste(key, dt, "test", "hello world"))
+ await storage.insert(Paste(dt, "test", "hello world"), key)
assert await storage.exists(key)
- assert not await storage.exists(generate_key())
+ assert not await storage.exists(keygen(6))
async def test_delete(storage: Sqlite) -> None:
dt = datetime.now()
- key = generate_key()
+ key = keygen(6)
- await storage.insert(Paste(key, dt, "test", "hello world"))
+ await storage.insert(Paste(dt, "test", "hello world"), key)
assert await storage.exists(key)
@@ -60,14 +60,13 @@ async def test_delete(storage: Sqlite) -> None:
async def test_insert_retrieve(storage: Sqlite) -> None:
dt = datetime.now()
- key = generate_key()
+ key = keygen(6)
- await storage.insert(Paste(key, dt, "test", "hello world"))
+ await storage.insert(Paste(dt, "test", "hello world"), key)
paste = await storage.retrieve(key)
assert paste is not None
- assert paste.key == key
assert paste.dt == dt
assert paste.syntax == "test"
assert paste.text == "hello world"
@@ -81,7 +80,8 @@ async def main() -> int:
await connection.execute(
(
"create table pastes("
- "key text,"
+ "key blob,"
+ "key_length int,"
"datetime text,"
"size int,"
"syntax text"